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FIG. 3 



□ specifier=oddress+(size/2)+( width/2) 



width = 16 bytes 



address 

size/2 

width/2 




depth = 4 bytes 



V \size = depth x width = 64 b^es \ 



address is aligned to size (64 bytes), 
so low-order 6 bits ore zero 



aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa 



000000 \ 



ooooooooooooooooooooooooooooooooo l 100000 | 



000000000000000000000000000000000 \ 001000 l 



specifier I aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa 1 101000 I 
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500 S 505 a/r* c 510 
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specifier I aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa 1 10W00 \ 
i <_ — r 



610 



600- 



605 



615- 



s and (0-s) I 

T 



width/2 000000000000000000000000000000000 001000 

s — 



620 



625-^ \ s and no t (width/2)\ 
aaaaaaaaaaaaaaaaaaaaaaaaaaaagaaaaaaaa \ 100000 \ 



£J0 



6J5- 



f o/jtf (0-t) 



size/2 \ 000000000000000000000000000000000 1 100000 1 
^ I < 

f and not (size/2) I 
' j 



address 



aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa 
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F/G. 0 
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Wide operand specifier 



Memory 
Memory width- 



Register operand 



Register operand 



Portion 0 



Portion 1 



Portion 2 



Portion 3 



Portion 4 



Portion 5 



Portion 6 



Portion 7 



Function 



Function unit with dedicated storage 



Result 
Register width\ - 
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operand 
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Qwmc.c contents 



Owmcpa-physical address 
Qwmc.size-size of contents 
□ wmc. cv-con ten ts valid 
Qwmcth-thread last used 
Uwmc.reg-register last used 
Qwmc.rtv-register & thread valid 
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Operation codes 



W.SWITCH.B 


Wide switch big-endian 


W.SWITCH.L 


Wide switch little-endian 



Selection 



class 


op 


order 


Wide switch 


W.SWITCH 


B L 



Format 

W.op.order ra=rc,rd,rb 

ra=woporder(rc,rd,rb) 

31 24 23 

1 W.op.order 
8 



18 17 



12 11 



6 5 



rd 



rc 



rb 



ra 



FIG. 12A 
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Wide Switch 



FIG. 12B 
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Definition 

defWideSwitch(op,rd,rc,rb,ra) 

d-*-RegRead(rd, 128) 

c-*-RegRead(rc, 64) 

b-*-RegRead(rb, 128) 

if ci o* Othen 

raise AccessDisallowedByVirtual Address 

elseif c 6 o *° tnen 

VirtAddr-*-c and (c-1) 

W wsize (c and (0-c)) || 0 

else 

VirAddr-*-c 

w wsize -*— 1 28 

endif 

msize-*-8*wsize 
lwsize-*-log(wsize) 

case op of 

W.SWITCH.B: 

order B 
W.SWITCH.L: 
order-*— L 

endcase 

m LoadMemory(c, VirtAddr.msize.order) 

db-«-d|| b 
fori-*-0to 127 

j-*—0|| iiwsize-1..0 .. 

k-*- m 7 ^||m6«w + jllm5. w+ j||m4. w+ jl|m3' W+ jllm2«w + jll 

1 i7.1 wsize II jlwsize-1..0 

endfor 

RegWrite(ra, 128, a) 
enddef 



FIG. 12C 



^1280 

t 



Exceptions 

Access disallowed by virtual address 
Access disallowed by tag 
Access disallowed by global TB 
Access disallowed by local TB 
Access detail required by tag 
Access detail required by local TB 
Access detail required by global TB 
Local TB miss 
Global TB miss 
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Operation codes 



W.TRANSLATE.8.B 


Wide translate bvtes bia-endian 


W.TRANSLATE.16.B 


Wide translate doublets bit-endian 


W.TRANSLATE.32.B 


Wide translate auadlets bit-endian 


W.TRANSLATE.64.B 


Wide translate octlets bia-endian 


W.TRANSLATE.8.L 


Wide translate bvtes little-endian 


W.TRANSLATE.16.L 


Wide translate doublets little-endian 


W.TRANSLATE.32.L 


Wide translate quadlets little-endian 


W TRANSLATE. 64.L | Wide translate octlets little-endian 



Selection 



class 


size 


order 


Wide translate 


8 16 32 64 


B L 



Format 

W.TRANSLATE.size.order rd=rc,rb 

rd=wtranslatesizeorder(rc,rb) 

31 2434 1817 



1211 



65 



21 0 



W.TRANSLATE.order 



rd 



rc 



rb 



sz 



sz-«- log(size) = 3 



FIG. 13A 



vsize 



g size 



w size 
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Wide translate: 16 entries by 64 bits 



FIG. 13B 



1350 

f 

Definition 

def Wide Translate(op,gsize,rd,rc t rb) 

c-«-RegRead(rc, 64) 

b-«-RegRead(rb, 128) 

lgsize-*-log(gsize) 

if Ci nS i, e io'O t nen 

raise AccessDisallowedByVirtual Address 

endif 

if C4 lqsize-3 * 0 then , 

wsize^(c and (0-c))||0 3 
t-*-c and (c-1) 

else 

wsize-*-128 
t-*-c 

endif 

Iwsize-^-log(wsize) 

if t|wsize+4..lwsize-2 * 0 tnen „ . 
msize^(tand(0-t))||0 4 

VirtAddr-*-tand (t-1) 

else 

msize-*-256*wsize 
VirtAddr-^-t 

endif 

case op of 

W.TRANSLATE.B: 

order-*-B 
W.TRANSLATE.L: 

orders L 

endcase 

m-*-LoadMemory(c,VirtAddr,msize,order) 

vsize-*-msize/wsize 

lvsize-*-log(vsize) 

for i-*-0 to 128-gsize by gsize 

j^_((order=B)Wsize ) A (b| VS izo-i*i..i ))*wsize+i| WSize .i..o 

agsize-1+i..i-*-nf1j^gsize-1..j 

endfor 

RegWrite(rd, 128, a) 
enddef 
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Exceptions 

Access disallowed by virtual address 
Access disallowed by tag 
Access disallowed by global TB 
Access disallowed by local TB 
Access detail required by tag 
Access detail required by local TB 
Access detail required by global TB 
Local TB miss 
Global TB miss 
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Dnpration coripQ 


p> — l*HU 


W.MUL.MAT.8.B 


Wide multiply matrix siqned byte big-endian 


W.MULMAT.8.L 


Wide multiply matrix siqned byte little-endian 


W.MULMAT.16.B 


Wide multiply matrix sianed doublet biq-endian 


W.MUL.MAT.ib.L 


Wide multiply matrix siqnsd doublet little-endian 


W.MUL.MAT.32.B 


Wide multiply matrix sianed auadlet biq-endian 


W.MUL.MAT.32.L 


Wide multiply matrix sianed auadlet little-endian 


W.MUL.MAT.C.8.B 


Wide multiply matrix sianed complex byte biq-endian 


W.MUL.MAT.C.8.L 


Wide multiply matrix sianed complex byte little-endian 


W.MUL.MAT.C.16.B 


Wide multiply matrix sianed complex doublet bia-endian 


W.MULMAT.C.16.L 


Wide multiply matrix sianed complex doublet little-endian 


W.MULMAT.M.8.B 


Wide multiply matrix mixed-sianed byte bia-endian 


W.MULMAT.M.8.L 


Wide multiply matrix mixed-sianed bvte little-endian 


W.MUL.MAT.M.16.B 


Wide multiply matrix mixed-sianed doublet bia-endian 


W.MULMAT.M.16.L 


Wide multiply matrix mixed-sianed doublet little-endian 


W.MUL.MAT.M.32.B 


Wide multiply matrix mixed-sianed auadlet bia-endian 


W.MUL.MAT.M.32.L 


Wide multiply matrix mixed-sianed auadlet little-endian 


W.MUL.MAT.P.8.B 


Wide multiply matrix polynomial bvte bia-endian 


W.MUL.MAT.P.8.L 


Wide multiply matrix polynomial bvte little-endian 


W.MUL.MAT.P.16.B 


Wide multtply matrix polynomial doublet bia-endian 


W.MUL.MAT.P.16.L 


Wide multiply matrix polynomial doublet little-endian 


W.MUL.MAT.P.32.B 


Wide multiply matrix polynomial auadlet biq-endian 


W.MULMAT.P.32.L 


Wide multiply matrix polynomial auadlet little-endian 


W.MULMAT.U.8.B 


Wide multiply matrix unsigned bvte biq-endian 


W.MUL.MAT.U.8.L 


Wide multiply matrix unsiqned bvte little-endian 


W.MULMAT.U.16.B 


Wide multiply matrix unsiqned doublet biq-endian 


W.MUL.MAT.U.16.L 


Wide multiply matrix unsiqned doublet little-endian 


W.MUL.MAT.U.32.B 


Wide multiply matrix unsiqned auadlet biq-endian 


W.MUL.MAT.U.32.L 


Wide multiply matrix unsigned quadlet little-endian 



Selection 



class 


op 


type 


size 


order 


multiply 


W.MULMAT 


NONE MUP 


8 16 32 


B 










L 






C 


8 16 


B 










L 



Format 

W.op.size.order rd=rc,rb 
rd=wopsizeorder(rc,rb) 
31 2423 



1817 



1211 



W.MINOR.order 



65 



21 0 



rd 



rc 



rb 



8 

sz-*- log(size) - 3 



W.op 



sz 
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m[rcl(128*6 
511 



/size) 



127 



rb(128) 




rd(128) 0 
Wide multiply matrix 



FIG. 14B 
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Definition * 
def mul(size,h,vs,v,i,ws,j)as 

mul-*-((vs&v S ize-l + i) h ' s,ze l|Vsize-l + i..i) *((ws&w S ize-H) h ' S,Ze ll w S ize-H..j) 
enddef 

def c-«-PolyMultiply(size,a,b) as 
p[0] ^_ 0 2*size 

,Ork ^P*Va k? (0^||b||0V0^ 

endfor 
c-*-p[size] 
enddef 

def WideMultiplyMatrix(major,op,gsize,rd.rc,rb) 
d-*-RegRead(rd, 128) 
c-*-RegRead(rc, 64) 
b-«-RegRead(rb,128) 
lgsize-«-log(gsize) 

if C|g S j Ze -4..o * 0 tnen 

raise AccessDisallowedByVirtualAddress 

endif 

wsize-»-(c and (0-c))|| 0 4 
t-*-c and (c-1) 

else 

wsize-«-64 
t-*-a 

endif 

lwsize-*-log(wsize) 

if t|wsize+6-lgsize..lwsize-3 * 0 tne . n 
msize^(tand (0-t))|| 0 4 
VirtAddr-*-t and (t-1) 

else 

msize-*-128*wsize/gsize 
VirtAddr-^-t 

endif 

case major of 
W.MINOR.B: 

order -«-B 
W.MINOR.L: 
order-*-L 

endcase 

FIG. 14D-1 
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case op of 

M.MULMAT.U.8, W.MULMAT.U.16, W.MUL.MAT.U.32, 
W.MULMAT.U.64: 

ms-*-bs-*-0 
W.MUL.MAT.M.8, W.MUL.MAT.M.16, W.MUL.MAT.M.32, 
W.MUL.MAT.M.64 

ms-+-0 

bs 

W MUL.MAT.8, W.MULMAT.16, W.MUL.MAT.32, 
W.MUL.MAT.64, W.MUL.MAT.C.8, W.MUL.MAT.C.16, 
W.MUL.MAT.C.32, W.MUL.MAT.C.64: 

ms-+-bs-*-1 
W.MUL.MAT.P.8, W.MULMAT.P.16, W.MUL.MAT.P.32, 
W.MUL.MAT.P.64: 
endcase 

-*-LoadMemory(c,VirtAddr,msize,order) 
-*-2*gsize 

r i -*-0 to wsize-gsize by gsize 
q[01-*-0 2 *9 size 

for j-*-0 to vsize-gsize by gsize 
case op of 

W.MUL.MAT.P.8, W.MUL.MAT.P.16, 
W.MUL.MAT.P.32, W.MUL.MAT.P.64: 
k i+wsize*j 8 ..|gsize 

q(j+gsize] qlj] A PolyMultiply(gsize,mk + g S ize-i..k. 

bj+gsize-1..j) nn 
W.MUL.MAT.C.8, W.MUL.MAT.C.16, W.MUL.MAT.C.32, 

W.MUL.MAT.C.64: 

if (~i) & gsize = 0 then 

k-+-i-(j&gsize)+wsize*j 8 ..i0 S j Ze+ i 
q[j+gsize}-»- q[i] + mul(gsize,h,ms,m,k,bs,b,j) 

else 

k «*- i+gsize+wsize*j8..igsize+i 
q[i+gsize]-*-q[i] = mul(gsize,h,ms,m,k,bs,b,j) 

endif 
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W.MUL.MAT.8, W.MUL.MAT.16, W.MULMAT.32, 
W MULMAT.64, W.MULMAT.M.8, W.MULMAl.M.16, 
W MULMAT.M.32, W.MUL.MAT.M.64, W.MUL.MAT.U.8, 
W.MULMAT.U.16, W.MUL.MAT.U.32, W.MULMAT.U.64 
q[i+gsize] q(i] + mul(gsize,h,ms,m,i+wsize* 

j8..lgsize,bs,b,j) 

endfor 

a2*gsize-1>2'i..2'i^q[ vsize l 

endfor 

ai27..2*wsize-*~ 0 
RegWrite(rd, 128, a) 

enddef 
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Exceptions 



Access disallowed by virtual address 
Access disallowed by tag 
Access disallowed by global TB 
Access disallowed by local TB 
Access detail required by tag 
Access detail required by local TB 
Access detail required by global TB 
Local TB miss 
Global TB miss 
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Operation codes 




W.MUL.MAT.X.B 
W.MULMAT.X.L 


.Wide multiply matrix extract bm-endian 
Wide multiply matrix extract little-indian 


Selection 




order 


class 

Multiply matrix extract 


W.MUL.MAT.X 


B L 



Format 

W.op.order ra=rc,rd,rb 

ra=wop(rc,rd,rb) 

3j 2423 1817 1211 

I W.o p.order I rd I rc j_ 
8 6 6 
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023 m[rc](128*128/size) 



, \extract /, , \extrac^ , , 



127 



rd(128) 




rb(32) 



128 ra(128) o 

Wide multiply matrix extract doublets 
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r 



511 rc(64*128/size) 

I 1 1 1 I 1 I I 1127 




128 ra(1 28) 0 

Wide multiply matrix extract complex doublets 
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Definition 10 
def muHsize.h.vs.v.i.ws.w.j) as 

muh+- ((vs&v S i Ze .i + i)h-size||v S i Z e.i + i..i) * ((ws&w S j Ze -H) h - siZ9 l|Wsize-H.j) 

enddef 

def WideMultiplyMatrixExtract(op,ra,rb,rc,rd) 
d-*-RegRead(rd, 128) 
c-*-RegRead(rc, 64) 
b-*-RegRead(rb, 128) 
case b8..o of 
0..255: 

sgsize-*— 1 28 
256.383: 

sgsize-«-64 
384..447: 

sgsize-«-32 
448.-479: 

sgsize-^-16 
480..495: 

sgsize-*-8 
496..503: 

sgsize-*-4 
504..507: 

sgsize -*-2 
508.511: 

sgsize -*-1 

endcase 

l**-bn 

m-*-bi2 

n-*-bi3 

signed-*-bi4 

if c 3 o*0 then 

"wsize-«-(c and (0-c))|| 0 4 

t-«-c and (c-1) 

else 

wsize-«-128 
t-«-c 

endif 

if sgsize < 8 then 

gsize-*-8 
elseif sgsize > wsize/2 then 

gsize-*-wsize/2 

else 

FIG. 15E-1 
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gsize~#-sgsize 

CHUM 

lgsize-*-log(gsize 
lwsize-#-log(wsize) 

'^lwsize-+6-n-lgsize..lwsize-3 * 0 ^ en 
msize^(t and (0-t))||0* 
VirtAddr^-t and (t-1) 

else 

msize 64*(2-n)*wsize/gsize 
VirtAddr^-t 
endif 

vsize -#-(1+n)*msize*gsize/wsize 

mm LoadMemory(c,VirtAddr,msize, order) 

lmsize-*-log(msize) 

if (VirtAddr, msize . 4 0 then 

raise AccessDisallowedByVirtualAddress 
endif 

case op of 

W.MULMATXB: 
orders- B 
W.MULMATXL: 
order L 

endcase 
ms-^- signed 
ds-«- signed A m 
as-*-signed or m 
spos-*-(b 8t . 0 ) and (2*gsize-1) 
dpos-«-(0|| b 2 3..i6) and (gsize-1) 
r-*-spos 

sfsize -*-(0|| b 31 >2 4) and (gsize-1) 

tfsize (sfsize = 0) or ((sfsize+dpos) > gsize) ? gsize-dpos : sfsize 
fsize -^—(tfsize + spos > h) ? h - spos : tfsize 
if (bio 9 = Z) & -signed then 
rnd-^F 

else 

rnd-*-b<io..9 
endif 



FIG. 15E-2 
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for i -*-0 to wsize-gsize by gsize 
q(0] o 2 *9 size+7 - | 9 size 
for j 0 to vsize-gsize by gsize 
if n then 

if (~) & j & gsize = 0 then 

k-«- i-(j&gsize)+wsize*j 8 lgsize+1 
q[i+gsize]-*- q[i] + mul(gsize,h,ms,mm,k,ds,d,j) 

else 

k-«- i+gsize+wsize*j 8 ..i gsiZ9+1 
q[i+gsize]-*-q[il - muKgsize.h.ms.mm.k.ds.d.j) 

endif 

else 

q[i+gsize]-*-q[i] = mul(gsize,h,ms,mm,i+j*wsize/gsize I ds ) 

endif 
endfor 
p^q[128] 
case rnd of 

none, N: 

s^0 h - r ||-p r || pM 

Z: 

s^OMIpf^ 
s-*-0 h 

C: 

s^oh-'||l r 

endcase 

v^((ds&ph-1)||p) + (0||s) 

'f (v h ..r + fsize = (as & v r+ fsize-l) h+1 - r - fsize ) or not I then 

w ^-(as & v r+ f siZ e-i)g size - fsize - d P os ||v fsize . 1+r .. r ||0dpos 

6lS6 

w-*-(s ? (v h ||-v9 size * dpos ' 1 ) : i9 size - d P° s )||o d P os 

endif 

asize-1+i..i^-W 
endfor 

a 127..wsize^" 0 
RegWrite(ra, 128, a) 

enddef 
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Exceptions 



Access disallowed by virtual address 
Access disallowed by tag 
Access disallowed by global TB 
Access disallowed by local TB 
Access detail required by tag 
Access detail required by local TB 
Access detail required by global TB 
Local TB miss 
Global TB miss 
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Operation codes 



W.MULMAT.X.I.8.B 


Wide multiply matrix extract immediate signed byte big-endian 


W.MULMAT.X.I.8.L 


Wide muliipiy maiii* «xii act immediate signed byts iiiils-sndian 


W.MULMAT.X.I.16.B 


Wide multiply matrix extract immediate signed doublet big-endian 


W.MULMAT.X.I.16.L 


Wide multiply matrix extract immediate signed doublet little-endian 


W.MULMAT.X.L32.B 


Wide multiply matrix extract immediate signed quadlet big-endian 


W.MUL.MAT.X.I.32.L 


Wide multiply matrix extract immediate signed quadlet little-endian 


W.MUL.MAT.X.I.64.B 


Wide multiply matrix extract immediate signed octlets big-endian 


W.MULMAT.X.I.64.L 


Wide multiply matrix extract immediate signed octlets little-endian 


W.MUL.MAT.X.I.C.8.B 


Wide multiply matrix extract immediate complex bytes big-endian 


W.MULMAT.X.I.C.8.L 


Wide multiply matrix extract immediate complex bytes little-endian 


W.MULMAT.X.I.C.16.B 


Wide multiply matrix extract immediate complex doublets big-endian 


W.MUL.MAT.X.I.C.16.L 


Wide multiply matrix extract immediate complex doublets little-endian 


W.MULMAT.X.I.C.32.B 


Wide multiply matrix extract immediate complex quadlets big-endian 


W.MUL.MAT.X.I.C.32.L 


Wide multiply matrix extract immediate complex quadlets little-endian 



Selection 



class 


op 


type 


size 


order 


wide multiply 
extract immediate 


W.MULMAT.X.I 


NONE 


8 16 32 64 


LB 


C 


8 16 32 


LB 



Format 

W.op.tsize.order rd=rc,rb, i 
rd=woptsizeorder(rc,rb,i) 
31 24 23 



18 17 



12 11 



6 5 4 32 0 
ihl 



W.op.order | rd 



rc 



rb 



sz 



8 



1 2 



sz-*- log(size) - 3 
assert size+3 > i > size-4 
sh-#- i - size 
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023 m[rc](128*128/size) 
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^extract/ 


F \extr, 


ac/ 


r \gxtn 


ac/ v \extrc 




F 




\extrac/ 


\exUact/ 


\extract/ 


\extract 



rd(128) 



128 rd(128) 0 

Wide multiply matrix extract immediate doublets 
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Definition 

def mul(size,h,vs,v,i,ws,w,j) as 

mul ^((vs&Vsize-1 + i) h - size ||v s ize-1*i..i) * ((ws&w S ize-H) h * S,2e H W size-H.j) 
enddef 

def WideMultiplyMatrixExtractimmediate(op,type,gsize,rd,rc,rb,sh) 
c-*-RegRead(rc, 64) 
b-«-RegRead(rb, 128) 
lgsize-#-log(gsize) 
case type of 
NONE: 

K c lgsize-4..0 * 0 then 

raise AccessDisallowedBy VirtualAddress 
endif 

if C3Jgslze-3 * 0 then A 
wsize-«-(c and (0-c))||0 4 

t^-c and (c-1) 

else 

wsize^-128 
t-*-c 
endif 

Iwsize^-log(wsize) 

if tiwsize+6-Igsize..lwsize-3 * 0 then 
msize -*-(t and (0-t))||0 4 
VirtAddr-*-tand (t-1) 

else 

msize 1 28*wsize/gsize 
VirtAddr-*-t 

C: 

if C|gsize-4,.0 * 0 then 

raise AccessDisallowedByVirtualAddress 

endif 

if C3..lgsize-3 * 0 then 

wsize -*-(c and (0-c)) || 0 4 
t-+-c and (c-1) 

else 

wsize ^-128 
endif 

Iwsize^-log(wsize) 

if tlw$ize+5-lgsize.,lwsize-3 * 0 then 
msize -«-(t and (0-t))|| 0 4 
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VirtAddM- t and (t-1) 
else 

msize-*-64*wsize/gsize 
VirtAddr-«-t 

endif 

vs i ze 2*msize*gsize/wsize 

endcase 
case of of 

W.MULMAT.X.I.B: 

order-*- B 
W.MUL.MAT.X.I.L: 
order-*- L 

endcase 

as-*-ms-*-bs-*-1 

m-*-LoadMemory(c,VirtAddr,msize,order) 
h (2*gsize) + 7 - lgsize-(ms and bs) 
r gsize + (sh 5 2 ||sh) 
for-*— 0 to wsize-gsize by gsize 
q[0] 02*gsize+7-lgsize 

for j-o- 0 to vsize-gsize by gsize 
case type of 
NONE: 

qy+gsize] -*-q[i] + mul(gsize,h,ms,m,i+wsize* 
j8..lgsize,bs,b,i) 

C: 

if (-i) & j & gsize = 0 then 

k-*-i-(j&gsize)+wsize*j 8 ,.|g S i Ze+ i 
q[j+gsize)-*-q(il + muKgsize.h.ms.m.k.bs.b.j) 

else 

k i+gsize+wsize*j 8 Jgsize+ i 
q[j+gsize]-*-q[j] - mul(gsize,h,ms,m,k,bs,b,j) 

endif 

endcase 
endfor 

p -#-q[vsize] 
s ^-0h-r||~p r || P M 
v— ((as & Ph.^IlP) + (0|ls) 
if (Vh..r*gsize = (as & v r+ g S i ze -1 )^-^™ then 
agsize-1+i..i -*-Vgsize-1+r..r 

else 

agsize-1+i..i— as ? (v h ||~vf ize - 1 ) : 1 gsize 

endif 
endfor 

ai27..wsize-*" 0 

RegWrite(rd, 128, a) rtn 4AH 1 

enddef FIG. 10U-d 



Exceptions 



Access disallowed by virtual address 
Access disallowed by tag 
Access disallowed by global TB 
Access disallowed by local TB 
Access detail required by tag 
Access detail required by local TB 
Access detail required by global TB 
Local TB miss 
Global TB miss 
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Operation codes 



W.MUL.MAT.C.F.16.B 


Wide multiply matrix complex floating-point half big-endian 


W.MULMAl.C.F.i6.L 


Wide multiply matrix complex ncating-point tittte-ar.disp, 


W.MULMAT.C.F.32.B 


Wide multiply matrix complex floating-point single big-endian 


W.MUL.MAT.C.F.32.L 


Wide multiply matrix complex floating-point single little-endian 


W.MUL.MAT.F.16.B 


Wide multiply matrix floating-point half big-endian 


W.MULMAT.F.16.L 


Wide multiply matrix floating-point half little-endian 


W.MULMAT.F.32.B 


Wide multiply matrix floating-point single big-endian 


W.MUL.MAT.F.32.L 


Wide multiply matrix floating-point single little-endian 


W.MUL.MAT.F.64.B 


Wide multiply matrix floating-point double big-endian 


W.MULMAT.F.64.L 


Wide multiply matrix floating-point double little-endian 



Selection 



class 


op 


type 


prec 


order 


wide multiply matrix 


W.MUL.MAT 


F 


16 32 64 


LB 




C.F 


16 32 


LB 



Format 

W.op.prec.order rd=rc,rb 
rd=wopprecorder(rc,rb) 

3J 24 23 

W.MINOR.order | rd~ 



18 17 



12 11 



rc 



rb 



65 



21 0 
W.op | pr 



8 



Pr-«-log(prec) - 3 



FIG. 17 A 
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c](128*128/size) 
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127 



rb(128) 



128 rd(128) 0 

Wide multiply matrix floating-point half 



FIG. 17B 
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Definition 

def muKsize.v.i.w.j) as 

mul-*-fmul(F(size,v S i Z e-i + i..i),F(size,w S i Ze -i + j..j)) 

enddef 



def WideMultiplyMatrixFloatingPoint(major,op,gsize,rd,rc,rb) 
c-*-RegRead(rc, 64) 
b-*-RegRead(rb, 128) 
Igsize^log(gsize) 
switch op of 

W.MUL.MAT.F.16, W.MUL.MAT.F.32, W.MULMAT.F.64: 

if c lgsize-4..0 * 0 then 

raise AccessDisallowedByVirtualAddress 

endif 

if C3..lgsize-3 * 0 tnen . 

wsize-«-(c and (0-c))||0 4 
t-*-c and (c-1) 

else 

wsize-«-128 
t-*-c 

endif 

lwsize-«-log(wsize) 

if t|wsize-*6-lgsize..lwsize-3 * 0 then 

msize*-(tand (0-t))|| 0 4 

VirtAddr-*- 1 and (t-1) 

else 

msize -«-1 28*wsize/gsize 
VirtAddr-«-t 

endif 

vsize-*— msize'gsize/wsize 
W.MUL.MAT.C.F.16, W.MULMAT.C.F.32, W.MULMAT.C.F.64: 

if C|g S i ze -4..0 * 0 then 

raise AccessDisallowedByVirtualAddress 

endif 

if c 3 ,.,g S i Ze -3 * 0 then 

wsize^(cand(0-c))|| 0 4 
t-«-c and (c-1) 

else 

wsize-«-128 
t-»-c 

endif 

lwsize-*-log(wsize) 

if t|wsize+5-lgsize..lwsize-3 * 0 tnen 

FIG. 17D-1 



>f — 1780 

msize^- (t and (0-t))|| 0 4 
VirtAddr-^-t and (t-1) 

else 

msize o^wsize/ysize 
VirtAddr— — t 

endif 

vsize -«-2*msize*gsize/wsize 

endcase 
case major of 
M.MINOR.B: 

order-*- B 
M.MINOR.L 
order-*- L 

endcase 

m LoadMemory(c,VirtAddr,msize,order) 
for i-«-0 to wsize-gsize by gsize 
q[0].t^NULL 

for j-*-0 to vsize-gsize by gsize 
case op of 

W.MULMAT.F.16, W.MUL.MAT.F.32, W.MULMAT.F.64: 
q[j+gsize]-*-faddq[j], mul(gsize,m,i+wsize* 

W.MUlSIaT + c!f!i6, W.MUL.MAT.C.F.32, 

W.MUL.MAT.C.F.64: 

if (~i) & j & gsize = 0 then 

k-«— i-(j&gsize)+wsize*j 8 lgsize +i 
q[j+gsize]-«-faqq[j], mul(gsize,m,k,b,j)) 

else 

k-4- i+gsize+wsize*j8..igsize+i 
q[j+gsizel-*-fsubq(jl, mul(gsize,m,k,b,j)) 

endif 

endcase 
endfor 

a gS ize-i-H..i-*-qlvsize] 
endfor 

ai27..wsize-*- 0 

RegWrite(rd, 128, a) 
enddef 



FIG. 17D-2 



Exceptions 

Floating-point arithmetic 
Access disallowed by virtual address 
Access disallowed by tag 
Access disallowed by global TB 
Access disallowed by local TB 
Access detail required by tag 
Access detail required by local TB 
Access detail required by global TB 
Local TB miss 
Global TB miss 
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Operation codes 



W.MUL.MAT.G.8.B 


Wide multiply matrix Galois bytes big-endian 


W.MUL.MAT.G.8.L 


Wide multiply matrix Galois bytes little-endian 



Selection 



class 


OD 


size 


order 


Multiply matrix Galois 


W.MUL.MAT.G 


8 


B L 



Format 

W.op.order ra=rc,rd,rb 
ra=woporder(rc,rd,rb) 

31 24 23 18 17 12 11 6 5 0 

I W.oo .order I rd I re I rb I ra J 
1 % 6 6 6 6 



FIG. 18A 
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2047 



mfrcl 



128'128/size) 




127 



rd(128) 



ra(128) 

Wide multiply matrix Galois byte 



FIG. 18B 



Definition 

def c-*-PolyMultiply(size,a,b) as 

p[0]-*-0 2#size 

for k-^— 0 to size-1 

p[k+1]^-p[k] A a k ? (0 size - k ||b|| 0 k ) : 0 2#si2e 

end for 

c-+-p[size| 
enddef 

def c-+-PolyResidue(size,a,b) as 
p[0] a 

for k-^— size-1 to 0 by-1 

P(k-1]— p[k] * p[0] si2e+k ?(0 size - k || b|| 0 k ) : 0 2 * 
endfor 

c-#-p[size] size-1 .0 
enddef 

def WideMultiplyMatrixGalois^p.gsize.rd.rc.rb.ra) 
d-«-RegRead(rd, 128) 
c-«-RegRead(rc, 64) 
b-*-RegRead(rb,128) 
Igsize-^— log(gsize) 
if cigsize-4..o * 0 then 

raise AccessDisallowedByVirtualAddress 

endif 

ifc 3..lgsize^3*0t h en 

wsize-«-(c and (0-c))||0 4 
t^-c and (c-1) 

else 

wsize-«-128 
t^-c 

endif 

lwsize-*-log(wsize) 

if tlwsize+6-lgsize.,lw$ize-3* 0 then 

msize-*-(t and (0-t)) || 0 4 

VirtAddr^tand(M) 

else 

msize-«-128*wsize/gsize 
VirtAddr^— t 

endif 

case op of 

W.MUL.MAT.G.8.B: 

order-*- B 
W.MULMAT.G.8.L: 
order -*-L 
endcase f/ G> 



r 



1860 



m -«-LoadMemory(c, VirtAddr.msize.order) 
for i-*-0 wsize-gsize by gsize 
qlOl-^O 2 *^' 26 

for 0 to vsize-gsize by gsize 
k-*-i+wsize*j 8 ..ig S ize 

q(j+gsize]-*-q[j] A PolyMultiply(gsize,mk+g S ize-i. k .dj+gsize-i.-j ) 
endfor 

agsize-1+i..i -*-PolyResidue(gsize,q[vsize],bgsize-i..o ) 
endfor 

ai27..wsize"*~~ 0 
RegWrite(ra,128, a) 

enddef 



FIG. 18C-2 



Exceptions 

Access disallowed by virtual address 
Access disallowed by tag 
Access disallowed by global TB 
Access disallowed by local TB 
Access detail required by tag 
Access detail required by local TB 
Access detail required by global TB 
Local TB miss 
Global TB miss 
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Operation codes 


E.MUL.ADD.X 


Ensemble multiply add extract 


E.CON.X 


Ensemble convolve extract 


Format 




E.op rd@rc,rb,ra 




rd=gop(rd,rc,rb,ra) 




31 24 


23 18 17 12 11 6 5 0 


I E.op 


| rd rc rb I ra l 


8 


6 6 6 6 



FIG. 19 A 
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Figures 19B and 20B has blank fields: should be. 



| dpos |x|s|n|m|llrnd| gssp I 



fsize 



FIG. 19B 
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\extract/ 
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, \extrad 



127 



rb(1 28) 



r \extrac^ jSpxtracj / , t 



' \extrac/ j \extrac/ J Xextrac/ ' \extrac/ 
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iii 

128 rd(128) 0 

Ensemble multiply add extract doublets 
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v 

jxtra 
a\ 




\extra 




vpxtra 




v extrac 






\extrs 
a 




\extra 




^extract/ 1 \ 


extract 

T 



I l l I I I I 11 



128 rd(128) 0 

Ensemble complex multiply add extract doublets 

This ensemble-multiply-add-extract instructions (E.MULADD.X), when 
the x bit is set, multiply the low-order 64 bits of each of the rc and rb 
registers and produce extended (double-size) results. 
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\extra<V \extracV \extiacj/ \extrac/ 



\extract/ \ F \extrac/ , , \extrac/ , p \extracy /, } 



T 



128 



rd(128) 0 

Ensemble convolve extract doublets 



(128) 




FIG. 19E 




I I I I I I I I I 
128 rd(128) 0 

Ensemble convolve extract complexdoublets 
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Definition 

def muKsize.h.vs.v.i.ws.w.j) as 

muH- ((vs&v s j Z e.l + i)b-size||Vsi Z e-1+iJ * ((ws&Wsize-l^h-sizellw r 
enddef 

def EnsembleExtractlnplacefop.ra.rb.rc.rd) as 
d^RegRead(rd, 128) 
c-«-RegRead(rc, 128) 
b-*-RegRead(rb, 128) 
case b8..o of 
0..255: 

sgsize -*-1 28 
256..383: 

sgsize -«-64 
384..447: 

sgsize -«-32 
448..479: 

sgsize -*-16 
480..495: 

sgsize -^8 
496.. 503: 

sgsize-*-4 
504..507: 

sgsize -*-2 
508.511: 

sgsize -«-1 

endcase 

l-*-an 

m-*-ai2 

n-^ai3 

signed-«-ai4 

x^ai 5 

case op of 

E.CON.X: 

if (sgsize < 8) then 

gsize-*-8 
elseif (sgsize*(n-1)*(x+1) > 128 then 
gsize-»-128/(n-1)/(x+1) 

else 

gsize^*- sgsize 

endif 

Igsize^log(gsize) 
wsize 128/(x+1) 



FIG. 19G-1 



vsize -*-128 
ds-«-c$-*- signed 
bs-*- signed A m 
zs-«- signed or m or n 

h-»- (2*gsize) + log(vsize) - Igsize 
spos-»- (a 8 .. 0 ) and (2*gsize-1) 

E.MUL.ADD.X: 

if(sgsize < 9) then 

gsize-*-8 
elseif (sgsize*(n+1)*(x+1) > 128) then 

gsize-»-128/(n+1)/(x+1) 

else 

gsize-*-sgsize 
endif 

ds-*- signed 
cs-*- signed A m 
zs-^- signed or m or n 
zsize-*- gsize*(x+1) 
h-*- (2*gsize) + n 
spos -«-(a 8 o) and (2*gssize-1) 
endcase 

dpos (0|| a 23- .i6) and (zsize-1) 
r-*-spos 

sfsize— - (0|| a 31 .24) and (zsize-1) 
tfsize (sfsize = 0) or ((sfsize+dpos) > zsize) ? zsize-dpos : sfsize 
fsize -«-(tfsize + spos > h) ? h - spos : tfsize 
if (bio 9 = Z) and not as then 
rnd-^F 

else 

rnd-4- bio..9 
endif 




FIG. 19G-2 



1990 

for k 0 to wsize-zsize by zsize 
i^-k*gsize/zsize 
case op of 
E.CCN.X: 
q(0]^0 

for j-*- 0 to vsize-gsize by gsize 
if n then 

if(~) & j & gsize = 0 then 

q[j+gsize]-«- q[j] + mul(gsize,h l ms,m t i+ 
128-j.b8.bJ) 

else 

qG-Kjsize]-*-q(j] - mul(gsize,h,m$,i+ 
128-j+2*gsize,bs,b,j) 
endif 

else 

q[j+gsize] -«-q(j] + mul(gsize,h,ms,m,i+ 
128-j.bs.bj) 

endif 
endfor 

p-«-q[vsize] 
E.MUL.ADD.X: 

di -«-((ds and dk+zize-1 )h-zsize-r|| (d k +zsize-1..k )|| 0 r ) 
if n then 

if ( i and gsize) = 0 then 

p muKgsize.h.ds^j.cs.cj)- 
muKgsize.h.ds^j+gsize.cs.ci+gsizeJ+di 

else 

p^muKgsize.h.ds.dj^s.cj^sizeJ^uKgsize.h^ds^^cs.c/i^size)^ 
endif 

else 

p^muKgsize.h.ds^j.cs.ci) +di 

endif 

endcase 
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case rnd of 

N: 

S^0 h - r ||-p r ||pM 

Z: 

p. s -o h " r "pLi 

s-«-0 h 

C: 

s — 0 hr ll 1 r 

endcase 

v^Kzs&p^H p) + (0||s) 

lf K..Msize = ( zs & Vr+fsize-l) h+1 - r - f size) 0 r not (I and (op = 
EXTRACT)) then V v 

w^(zs & v r+ ( s i Z e-i)zsize-fsize-dpos|| Vfsjze _ l+f j| O dpos 

else 

W-»-(zS ? (Vh||~V^ size - d P° s - 1 ) : izsize-dposj it Qdpos 
endif 

Zzsize-1_k..k-«-w 
endfor 

RegWrite(rd, 128, z) 
enddef 



FIG. 19G-4 
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Operation codes 






F Mill x 


Ensemble multiply extract 


F EXTRACT 


Ensemble extract 


E.SCAL.ADD.X 


Ensemble scale and extract 


Format 






E.op ra=rd,rc,rb 






ra=eop(rd,rc,rb) 






31 24 


23 18 17 12 11 


6 5 0 


I E.op 


rd rc rb 


I ra I 


8 


6 6 6 


6 



FIG. 20A 
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Figures 19B and 20B has blank fields: should be. 
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fsize 



I dpos |x|s|n|m| Hrnd| gisp 
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rd(128) 




fixtract/|^xt rac£7 \extoct/ .;Wacj7 ir 




127 



rc(128) 



\extract/ \extfact Z ^xlracj7 \extrac"7 

128 ra(128) o 

Ensemble complex multiply extract doublets 

This ensemble-multiply-extract instructions (E.MUL.X), when 

the x bit is set, multiply the low-order 64 bits of each of the rc and rb 

registers and produce extended (double-size) results. 
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I I I ~ I 

^xtr act/|\^tfact7 . , \extract/ , , \extract/ 
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128 ra(128) o 

Ensemble scale add extract doublets 
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0 >"b(128) 
95 



ra(128) 0 

Ensemble complex scale add extract doublets 

The ensemble-scale-add-extract instructions (E.SCLADD.X), when the x bit 
is set, multiply the low-order 64 bits of each of the rd and re registers by the 
rb register fields and produce extended (double-size) results 
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Definition 2090 
def mul(size,h,vs,v,i,ws,w,j) as ^~ 



muK- ((vs&v S i Ze -i + i)h-size||v si2e . 1+iJ ) * ((ws&w size . 1+j )h-si Z e|| Wsjze . H . j 



enddef 

def EnsembleExtractfop.ra.rb.rc.rd) as 
d-*-RegRead(rd, 128) 
c-*-RegRead(rc, 128) 
b-*-RegRead(rb, 128) 
case be..o of 
0..255: 

sgsize-*-128 
256..383: 

sgsize-*-64 
384. 447: 

sgsize-«-32 
448..479: 

sgsize-*-16 
480..495: 

sgsize-«-8 
496..503: 

sgsize-«-4 
504..507: 

sgsize-*-2 
508..511: 

sgsize-*-1 

endcase 

N-bii 

n>*-bi2 

n-*-bi3 

signed-*- bi4 

x-*-bi5 

case op of 

E.EXTRACT: 

gsize-*-sgsize*2(2-(m orx)) 
zsize-*- sgsize 
h-*-gsize 
as-*- signed 

spos-*-(ba..o) and (gsize-1) 



FIG. 20J-1 



E.SCAL.ADD.X: <~ 
if (sgsize < 8) then 

gsize-*- 8 
eiseif (sgsize^n+i) > 32) then 

gsize-*- 32/(n+1) 

else 

gsize sgsize 

endif 

ds-*- cs signed 
bs-^- signed A m 
as-«- signed or m or n 
zsize gsize # (x+1) 
h -*-(2 # gsize) + 1 + n 
spos -*-(be o) and (2*gsize-1) 
E.MUL.X: 

if (sgsize < 8) then 

gsize ^-8 
elseif (sgsize*(n+1)*( x +1) > 128) then 

gsize 128/(n*1)/(x+1) 

else 

gsize sgsize 

endif 

ds -^-signed 

cs^- signed A m 

as signed or m or n 

zsize -*-gsize*(x+1) 

h-*-(2*gsize) + n 

spos-#-(b8..o) and (2*gsize-1) 

endcase 

dpos-*-(0|| b23..i6) and (zsize-1) 
r-*-spos 

sfsize -*-(0|| b3i..24) and (zsize-1) 

tfsize (sfsize =0) or ((sfsize+dpos) > zsize) ? zsize-dpos : sfsize 
fsize (tfsize + spos > h) ? h - spos : tfsize 
if (bio 9=Z) and not as then 
rnd-*-F 

else 

rnd-«-b 

endif 



FIG. 20J-2 



for j 0 to 1 2o-zsize by zsize ^- 2090 

i-*-j*gsize/zsize 
case op of 

E. EXTRACT: 
if m or x then 

p-*- dgsize+i-l..i 

else 

P"* - ( d ll C)gsize-H-1..i 
endif 
E.MUL.X: 
if n then 

if (i and gsize) = 0 then 

p-«-mul(gsize,h,ds,d,i,cs,c,i)- 
muKgsize.h.ds.d.i+gsize.cs.ci+gsize) 

else 

mul(gsize > h,ds,d 1 i 1 cs > c,i+gsize)+mul(gsize,h 1 ds,d,i,cs,c,i+gsize) 

endif 

else 

p^muKgsize.h.ds.d.i.cs.c.i) 
endif 

E.SCAL.ADD.X: 
if n then 

if (i and gsize) = 0 then 

p mul(gsize,h,ds I d,i > bs,b,64+2*gsize) 
+ mul(gsize,h,cs,c,i I bs,b,64) 
-mul(gsize,h,ds 1 d,i+gsize P bs,b,64+3*gsize) 
- mul(gsize,h,cs,c,i+gsize,bs,b,64+gsize) 

else 

p^mul(gsize,h,ds,d I i,bs,b,64+3*gsize) 
+ mul(gsize 1 h,cs,c,i,bs ( b,64+gsize) 
+ mul(gsize,h,ds,d,i+gsize,bs,b,64+2*gsize) 
+ mul(gsize > h > cs,c,i+gsize,bs,b,64) 

endif 

else 

p-»- mul(gsize,h 1 ds,d 1 i,bs,b,64+gsize) + mul(gsize 
,h,cs,c.i,bs,b,64) 

endif 

endcase 



FIG. 20J-3 



case rnd of ^-2090 
s^0 h -Mhp r || pM 

C: 

s^-0 h - r || 1 r 

endcase 

v -*-((as & p h -i)llp) + (Oils) 

if (vh..r + fsize= (as & v r+ fsize-i) h+1 - r - fsize ) or not (I and (op = 
E. EXTRACT)) then 
w (as & v r+fsize=1 )zsize-fsize-dpos|| Vfsize . 1+r r || O^pos 

6IS6 

w (s ? (Vh|| - v p ze - d P° s - 1 ) : izsize-dpos, .I Q dpos 
endif ' 11 

if m and (op = E. EXTRACT) then 

Zzsize.Hj..j c asize-1 + j..dpos + fsize*j||w dpos+fsi2e . 1 dpos( | 
c dpos-1+j..i 

else 

Zzsize-1+j..j-*-w 
endif 

end for 

RegWrite(ra, 128, z) 
enddef 
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data 



Gateway with pointers to code and data spaces 



FIG. 21 A 
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Typical dynamic-linked, inter 
caller: 



-gateway calling sequence: 



caller AA.DDI 
S.I.64.A 
S.I.64.A 

L.I.64.A 
LI.64.A 
B.GATE 
LI.64.A 

..(code using dp) 
LI.64.A 

A. ADDI 
B 

callee (non-leaf): 

calee: L.I.64.A 
S.I.64.A 
L.I.64.A 
S.I.64.A 
S.I.64.A 
...(using dp) 
LI.64.A 

...(code using dp) 

LI.64.A 

LI.64.A 

B. DOWN 

callee (leak, no stack): 

callee: ...(using dp) 
B.DOWN 



sp@-size 

Ip.sp.off 

dp.sp.off 

lp=dp,off 
dp=dp,off 

dp.sp.off 

lp=sp,off 

sp=size 

IP 



dp=dp,off 

sp.dp.off 

sp=dp,off 

Ip.sp.off 

dp.sp.off 

dp.sp.off 

lp=sp,off 
sp=sp,off 
IP 



// allocate caller stack frame 



// load lp 
// load dp 



// restore original lp register 
// deallocate caller stack frame 
// return 



// load dp with data pointer 
// new stack pointer 



// restore original lp register 
// restore original sp register 



lp 



FIG. 21B 
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Operation codes 



B.GATE I 


Branch gateway | 


Equivalencies 


B.GATE 


B.GATE 0 | 


Format 




B.GATE rb 




bgate(rb) • 




31 24 


23 18 17 12 11 6 5 0 


| B. MINOR 


U 1 I rb | R ftATC | 


8 


6 6 6 6 




FIG. 21 C 
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FIG. 21 D 



Definition 



def BranchGateway(rd,rc,rb) as 
c <- RegRead(rc, 64) 
b *- RegRead(rb, 64) 
if(rd*0) or (rc * 1) then 

raise Reservedlnstniction 

endif 

if C2..0 *0then 

raise AccessOisailowedByVirtualAddress 

endif 

d 4- ProgramCounter 63 .. 2 +1 || PrivilegeLevel 
if PrivilegeLevel < b 1<>0 then 

m <- LoadMemoryG(c,c,64,L) 
if b * m then 

raise GatewayDisallowed 

endif 

PrivilegeLevel «-bi..o 

endif 

ProgramCounter <- be 3 . 2 II 0 2 
RegWrite(rd, 64, d) 
raise TakenBranch 
enddef 
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Exceptions 



Reserved Instruction 
Gateway disallowed 
Access disallowed by virtual address 
Access disallowed by tag 
Access disallowed by global TB 
Access disallowed by local TB 
Access detail required by tag 
Access detail required by local TB 
Access detail required by global TB 
Local TB miss 
Global TB miss 



FIG. 21F 
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Operation codes 




Selection 



scale add 



ESCALADD.F 



prec 



16 32 64 



Format 

E.op.prec ra=rd,rc,rb 

ra=eopprec(rd,rc f rb) 

31 24 23 



18 17 



12 11 



E.op.prec 

8 



rd 

6 



6 5 



rc 



rb 



ra 

6 



FIG. 22A 
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Definition 

def EnsembleFloatingPointTemaiy(op f prec,rd t rc,rt),ra) as 
d <- RegRead(rd, 128) 
c «- RegRead(rc r 128) 
b 4- RegRead(rt). 128) 
for i «_ o to 128-prec by prec 

di «- F(prec t d| + p re< >l..i) 

ci<-F(prec,q+ pr ec-l..i) 

ai <- fadd(fmul(di t F(prec,bp rec -i..o)). fmul(ci, F(prec,b2* pr ec-i..prec))) 
aj+prec-l..i «- PackF(prec. ai, none) 
endfor 

RegWrite(ra, 128, a) 
enddef 



FIG. 22B 
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Operation codes 




Selection 



operation 
d 



function (binary} 



function (decimal) 



11110000 
11001100 



240 



204 



10101010 
10000000 



176 



d&c&b 



(d&c)[b 



d|c|b 



d?c:b 
d A c A b 



11101010 



11111110 



11001010 



128 



234 



254 
202 



10010110 
01101001 



150 



-d A c A b 



00000000 



105 



Format 

G. BOOLEAN rd@trc,trb,f 
rd=gbooleani(rd,rc,rb,f) 

31 25 2423 



18 17 



12 11 



6 5 



•BOOLEAN |ih| rd 
7 ' " 



rc 



rb 



1 



FIG. 23A 
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if fe^sthan 

if f2=fi then 

if f2 then 

rc <- max(trc.trb) 
rb min(trc,trb) 

else 

rc <- min(trc.trb) 
rb +- max(trc,trb) 

endif 
ih ^— 0 

il <- 0 If f 6 f| fz I! f 4 fl f3 If fO 

else 

if f2 then 

rc trb 
rb <- trc 

else 

rc <- trc 
rb<-trb 

endif 
ih <- 0 

H f 6l|f7||f4||f3l|f 0 

endif 

else 

ih <- 1 
if fe then 

rc <- trb 

rb trc 

H <~ f 1 II f 2 II f7 II f 4 II f 3 II f 0 

else 

rc f- trc 
rb *- trb 

<- f 2 II fl II f? || f 4 II f 3 || f 0 

endif 

endif 
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Definition 

def GroupBoolean (ih.rd.rc.rt.il) 
d <- RegRead(rd, 128) 
c <- RegRead(rc, 128) 
b *- RegRead(rt>, 128) 
if ih=0 then 

if ils=0 then 

f^il3l|iUI|iUI|il2l|iilll(rOrt})2||il 0 

else 

f<-H 3 || iU II H4 II H2 II «1 II 0 || 1 || H 0 

endif 

else 

f^il 3 || 0||1|| il 2 || ill II il 5 II "4 II Ho 

endif 

fori <r~Q to 127 by size 

ai «- f(di||q||bi) 
endfor 

RegWrite(rd, 128, a) 
enddef 
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Operation codes 

I B.H1NT | Branch Hint 



Format 

B.HINT badd.count.rd 
bhint(badd.count,rd) 

24 23 1817 1211 65 0 

I B.MINOR | rd I count T simm 1 B.H1KIT I 

8 ~ 6 6~ ~6~ 

simm <— badd-pc-4 



FIG. 24A 
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Definition 

def BranchHintfrd.countsimm) as 
d <- RegRead(rd, 64) 
»'Wl„o)* Often 

raise AccessOisallowedByVirtualAddress 

endif 



enddef 



FettHinKPragmmawttr H ♦ (0 1| slmn, || j , <* com) 



F/6. 24B 



I 



Exceptions 

Access disallowed by virtual address 



FIG. 24C 



r 
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Operation codes 



E.SINK.F.16 



E.SINK. P.lbC 



Ensemble conve rt floating-po 



Ensemble convert floatinq>po 



doublets from half nearest default 



doublets from half ceiling 



E.SINK.F.16.C.D 
.F.16.F 



Ensemble 
Ensemble 



convert floating-po 



doublets from half ceiling default 



E.SINK. 
E.SINK.F.16.F.D 



convert floating-po 



doublets from half floor 



Ensemble convert floating-po 



doublets from half floor default 



E.SINK.F.16.N 
F.16.X 



Ensemble 
Ensemble 



convert floating-po 



doublets from half nearest 



E.SINK. 
E.SINK, 



convert floating-po 



doublets from half exact 



F.16.Z 



Ensemble convert floating-po 



doublets from half zero 



E.SINK. 



F.16.Z.D 
F.32 



Ensemble 
Ensemble 



convert floating-poi 



doublets from half zero default 



E.SINK. 
E.SINK 



convert floating-poi 



quadlets from single nearest default 



F.32.C 



E.SINK. 
E.SINK. 



F.32.C.D 
F.32.F 



Ensemble convert floating-po 



Ensemble 
Ensemble 



convert floatinq-po 



quadlets from single ceiling 



quadlets from single ceiling default 



E.SINK.F.32.F.D 



convert floating-po 



E.SINK. 
E.SINK. 



F.32.N 
F.32.X 



Ensemble convert floating-po 



quadlets from single floor 



Ensemble convert floating-po 



quadlets from single floor default 



quadlets from single nearest 



Ensemble 
Ensemble 



convert floating-po 



quadlets from single exact 



E.SINK.F.32.Z 
F.32.Z.D 



convert floating-po 



quadlets from single zero 



E.SINK 
E.SINK 



Ensemble convert floating-po 



quadlets from single zero default 



F.64 



Ensemble 
Ensemble 



convert floating-po 



octlets from double nearest default 



E.SINK, 



F.64.C 
F.64.C.D 



convert floating-po 



octlets from double ceiling 



E.SINK 
E.SINK 



Ensemble convert floating-po 



octlets from double ceiling default 



F.64.F 



Ensemble 
Ensemble 



convert floating-po 



octlets from double floor 



E.SINK.F.64.F.D 
F.64.N 



convert floating-po 



octlets from double floor default 



E.SINK 
E.SINK 



Ensemble 
Ensemble 



convert floating-po 



octlets from double nearest 



F.64.X 



convert floating-po 



octlets from double exact 



E.SINK. 
E.SINK. 



F.64.Z 
F.64.Z.D 



Ensemble convert floating-po 



octlets from double zero 



Ensemble 
Ensemble 



convert floating-po 



octlets from double zero default 



E.SINK.F.128 



convert floating-po 



hexlet from quad nearest default 



E.SINK, 
E.SINK. 



F.128.C 
F.128.C.P 



Ensemble convert floating-poi 



hexlet from quad ceiling 



Ensemble 
Ensemble 



convert floating-po 



hexlet from quad ceiling default 



E.S1NK.F.128.F 



E.SINK, 
E.SINK. 



F.128.F.D 
F.128.N 



convert floating-po 



Ensemble convert floating-po 



hexlet from quad floor 



hexlet from quad floor default 



Ensemble 
Ensemble 



convert floating-po 



hexlet from quad nearest 



E.SINK. 
E.SINK. 



F.128.X 
F.128.Z 



convert floating-po 



hexlet from quad exact 



Ensemble 
Ensemble 



convert floating-po 



hexlet from quad zero 



E.SINK.F.128.Z.D 



convert floating-poin 



hexlet from quad zero default 



FIG. 25A-1 
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Selection 



integer from float 


op 

SINK 


prec 

r i6 32 64 128 


round/trap 

noneCFNXZCD 
F.DZ.D 


Format 










E.SINK.F.prec.rnd rd=rc 










rd=esinkfprecrnd(rc) 

31 24 23 


ii 


I 17 


12 il 


6 5 n 


1 E.prec I 


rd 


i 


rc lE.SINKJF.rndl E.IFNabv 1 


8 


6 




6 6 


6 



FIG. 25A-2 
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Definition 

def EnsemleSinkFloatingPoint(prec,round,rd,rc) as 
c-*-RegRead(rc, 128) 
for i-*— 0 to 1 28-prec by prec 
cH— F(prec,Cj+p ra c-1..i) 
ai+prec-i..i^— fsinkr(prec, ci, round) 
endfor 

RegWrite[rd, 128, a) 
enddef 



FIG. 25B 



Exceptions 

Floating-point arithmetic 



FIG. 25C 



Definition 

def eb-«-ebits(prec) as 
case pref of 

16: 

eb^5 

32: 

eb-*-8 

64: 

eb-^11 

128: 

eb-^15 

endcase 
enddef 

def eb ebias(prec)as 
eb 0|| iebits(prec)-1 

enddef 

def fb-^- fbits(prec) as 
fb-*- prec • 1 - eb 
enddef 

def a F(prec, ai) as 

a.s aiprec-1 

ae aiprec-2..fbits(prec) 

af-^aifbit S (prec)-1..0 
if ae = lebits(prec) then 

if af = 0 then 

a.t INFINITY 

elseif affbits( P rec)-i then 
a.t -*-SNaN 
a.e -*~fbits(prec) 
a.f 1| affbits(prec)-1„0 

else 

a.t -«-QNaN 

a.e^-fbits(prec) 

a.f-^af 

endif 
elseif ae = 0 then 
if af = 0 then 
a.t ZERO 



FIG. 25D-1 



else 

a.t-*- NORM 

a.e-#- 1-ebias(pec)-fbits(prec) 
a.f*o||af 

efidii 

else 

a.t NORM 

a f a ,?-ebias(prec)-fbits(prec) 
a.f^1||af 

endif 
enddef 

def a «*- DEFAULTQNAN as 

a.s -•-0 

a.t -*-QNAN 

a.e-*--1 

a.f-*-1 
endder 

def a DEFAULTSNAN as 

a.s-*-0 

a.t-*-SNAN 

a.e-*--1 

a.f 
enddef 



FIG. 25D-2 
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def fadd(a.b) as faddr(a,b,N) endder 

H*»f C. **t-f?i(\t\r(Oi K rr>ttr>A\ 

if a.t=NORM and b.t=NORM then 

// d,e are a,b with exponent aligned and fraction adjusted 
if a.e > b.e then 

d^-a 

e.t-»-b.t 

e.s-«-b.s 

e.e-*-a.e 

e.f -*-b.f || o a e ' b e 
else if a.e < b.e then 

d.t^-a.t 

d.s -*-a.s 

d.e-*-b.e 

d. f -«-a.f|| 0 be * ae 
e-»-b 

endif 
c.t -*-d.t 
c.e-«-d.e 
if d.s = e.s then 
c.s -*-d.s 

e. f -»-d.f + e.f 
elseif d.f > e.f then 

c.s -*-d.s 
e.f d.f -e.f 
elseif d.f < e.f then 
c.s -«-e.s 
e.f-*- e.f -d.f 

else 

c.s-*-r=F 
c.t -*-ZERO 
endif 



FIG. 25D-3 
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// priority is given to be operand for NaN propagation " 
elseif (b.t=SNAN) or (b.t=QNAN) then 
c-*-b 

elseif (a.t=SNAN) or (a.t=GNAN) then 
c a 

elseif a.t=ZERO and b.t=ZERO then 
c.t-*-ZERO 

c.s-*- (a.s and b.s) or (round=F and (a.s or b.s)) 
// NULL values are like zero, but do not combine with ZERO to alter sign 
elseif a.t=ZERO or a.t=NULL then 

c-#-b 

elseif b.t=ZERO or b.t=NULL then 
c-*-a 

elseif a.t=INFINITY and b.t-INFINITY then 
if a.s * b.s then 

c-*- DEFAULTSNAN //Invalid 

else 

c -*-a 

endif 

elseif a.t=INF!NITY then 

C 4 3 

elseif b.t=INFINITY then 
else 

assert FALSE // should have covered all the cases above 

endif 
enddef 

def b fneg(a) as 

b.s -a.s 

b.t^-a.t 

b.e-«-a,e 

b.f-*-a.f 
enddef 

def fsub(a.b) as fsubr(a t b,N) enddef 

def fsubr(a,b,round) as faddr(a,fneg(b),round) enddef 

def frsub(a,b) as frsubr(a f b,N) enddef 

def frsubr(a,b,round) as faddr(fneg(a),b, round) enddef 
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def c-^- fcom(a.b) as 

if (a.t-SNAN) or (a.t=QNAN) or (b.t=SNAN) or (b.t=QNAN) then 
c ^ U 

elseif a.t=INFINITY and b.t=INFINITY then 
if a.s * b.s then 

c-«-(a.s=0) ? G: L 

else 

c-*-E 

endif 

elseif a.t=INFINITY then 

c-«- (a.s=0) ? G: L 
elseif b.t=INFINITY then 

c-»-(b.s=0) ? L 
elseif a.t=NORM and b.t=NORM then 

if a.s * b.s then 

c-*-(a.s=0)?G: L 

else 

if a.e> b.ethen 
af-^a.f 

bf-*-b.f||O ae - be 

else 

aN«- a.f||O be - a e 
bf-«-b.f 
endif 

if af = bf then 
else 

c^((a.s=0) A (af>bO)?G:L 
endif 

endif 

elseif a.t=NORM then 

c (a.s=0) ? G: L 
elseif b.t-NORM then 

c^(b.s=0)?G: L 
elseif a.t=ZERO and b.t=ZERO then 

c-»-E 

else 

assert FALSE // should have covered al the cases above 
endif 
enddef 
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defc-*-fmul(a,b)as / 
if a.t=NORM and b.t=NORM then 

c.s-*-a.s A b.s 

c.t-*-NORM 

c.e a.e + b.e 

c.f a.f * b.f 
// priority is given to b operand for NaN propagation 
elseif (b.t=SNAN) or (b.t-QNAN) then 

c.s-«-a.s A b.s 

c.t b.t 

c.e-*- b.e 

c.f b.f 
elseif (a.t=SNAN) or (a.t=QNAN) then 

as -*-a.s A b.s 

c.t -*-a.t 

c.e -*-a.e 

c.f -*-a.f 
elseif a.t=ZERO and b.t=INFINITY then 

c^-DEFAULTSNAN// Invalid 
elseif a.t=INFINITY and b.t=ZERO then 

c -*-DEFAULTSNAN // Invalid 
elseif a.t=ZERO or b.t=ZERO then 

c.s -«-a.s A b.s 

c.t-*- ZERO 

else 

assert FALSE // should have covered al the cases above 
endif 
enddef 
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defc fdivr(a.b) as 2570 
if a.t=NORM and b.t=NORM then 

c.s-*-a.s A b.s 

c.t -*-NORM 

c.e -*- a.e - b.e + 256 

c.f-*-(a.f 0 )/b.f 
// priority is given to b operand for NaN propagation 
elseif (b.t=SNAN) or (b.t-QNAN) then 

c.s-*-a.s A b.s 

c.t b.t 

c.e-*- b.e 

c.f b.f 
elseif (a.t=SNAN) or (a.t=QNAN) then 

c.s-*-a.s A b.s 

c.t -*- a.t 

c.e-*-a.e 

c.f -*-a.f 
elseif a.t=ZERO and b.t=INFINITY then 

c DEFAULTSNAN //Invalid 
elseif a.t=INFINITY and b.t=INFINITY then 

c-*- DEFAULTSNAN //Invalid 
elseif a.t=ZERO then 

c.s^*- a.s A b.s 

c.t-*- ZERO 
elseif a.t=INFINITY then 

c.s -*-a.s A b.s 

c.t INFINITY 

else 

assert FALSE // should have covered al the cases above 
endif 

enddef 

def msb-*- findmsb(a) as 

MAXF-*- 2 18 // Largest possible f value after matrix multiply 
forj-*-OtoMAXF ¥1 

" %iaxf.i..j = (0 MAXF - 1 -i|| 1) then 
msb-*- j 

endif 
endfor 
enddef 
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Def ai-#- PackF(prec,a,round) as 
case a.t of 
NORM: 

msb findmsb(a.f) 

m -*-msb-1-fbtts(prec) //1sb for normal 

rdn^- -ebias(prec)-a.e-1-fbits(prec) // 1sb if a denormal 

rb^-(m>rdn)?rn:rdn 

if rb < 0 then 

a'fr-^- a.f msb .i..o||0- rb 
eadj-*-0 

else 

case round of 

C: 

s ^-o msb rb || (-a.s)rb 

s ^_0 msb - rb || (a.s)fb 
N, NONE: 

s ^-0msb-rb,|. afrb||af rb-1 

A. 

if a.frb-i..o * 0 then 

raise FloatingPointArithmetic // Inexact 



Z: 



endif 
s-*-0 



endcase 

v ^(0||a.W.o)*(0||s) 
ifv mS b=1then 

aifr-*-v msb . 1irb 

eadj 0 

else 

aifr-^- 0 fbits (Pwc) 
eadj 1 
endif 
endif 

aien a.e + msb - 1 + eadj + ebias(prec) 
if aien s 0 then 

if round = NONE then 

ai — a.s 1 1 o ebits <P rec >| | aifr 

else 

raise FloatingPointArithmetic //Underflow 
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endcase 
defdef 



2570 
endif 

elseif aien > 1 ebits(prec) {nen 
if round = NONE then 

//default: round-to-nearest overflow handlinq 
ai -*-a.s|| iebits(prec)|i 0 fbits(prec) 9 

else 

raise FloatingPointArithmetic // Overflow 
endif 

else 

endif 3 ' ^ " aienebils (P rec M-0 1 1 aifr 

SNAN: 

if round * NONE then 

raise FloatingPointArithmetic //Invalid 
endif 

if -a.e < fbits(prec) then 

ai -t-a.s||iebits(prec)|| a .f-a. e -1..ol I 0 Wte (P"e)*" 

6ISG 

tab a.f. a . e -1-fbits(prec)^1. 0 *0 
endif"' 3 8 1 1 1ebitS<PreC) Ha ^e-1..-a.e-1-f bi ts(precK2 ||1sb 
QNAN: 

if -a.e < fbits(prec) then 

else ai ^" a s " 1ebit8(prec) !l a.f- a .e-i..o||0 ,bUs (P re cH-» 

1sb-#- a.f. a .e-l-fbits(precH..o * 0 
endif ai — a.s||iebits(prec),| a.f-a. e -1..-a.a-1-fbits(prec)*2|| 1sb 
ZERO: 

ai-*- a.sll o ebits (P rec > | f nfbits(prec) 
INFINITY: 

ai a.s 1 1 lebits(proc) 1 1 Qfbits(prec) 



FIG. 25D-9 



^ — 2570 

Def ai fsinkr(prec, a, round) as 
case a.t of 
NORM: 

msb-*- fin<jmsb(a.f) 

rb-*--a.e 

if rb < 0 then 

aifr-*- a.fmsb..o||0" rb 
aims msb - rb 

else 

case round of 
CCD: 

s -*-0 msb - fb ||(-ai.s) rb 

F.F.D: 

s ^_ 0 msb-rb|| (ajs)rb 

N, NONE: 

s ^. 0 msb-rb| hajfrb| , ajf[ b-i 

X: 

if ai.frb-i..o * 0 then 

raise FloatingPointArithmetic // Inexact 
endif 
s-«-0 
Z, Z.D: 

s-«-0 

endcase 

v^(0||a.f msb .. 0 ) + (0||s) 
if v mS b=1 then 

aims msb + 1 - rb 

else 

aims msb -rb 
endif 

aifr-*- v a i m8 ..rb 
endif 

if aims > prec then 
case round of 

CD, F.D, NONE, Z.D: 

ai-^a.s||(-as)prec-i 
C,F,N,X,Z: 

raise FloatingPointArithmetic // Overflow 

endcase 
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elseif a.s = 0 then * 
ai -«-aifr 

else 

ai -#--aifr 

cndif 
ZERO: 

ai-#- 0P fec 
SNAN, QNAN: 
case round of 

CD, F.D, NONE, Z.D: 

ai^_ 0P rec 
C, F, N, X, Z: 

raise Floatingpoint Arithmetic // Invalid 

endcase 
INFINITY: 

case round of 

CD, F.D, NONE, Z.D: 

ai^a.s||(~as)P fec - 1 
C, F, N, X, Z: 

raise FloatingPointArithmetic // Invalid 

endcase 

endcase 
enddef 



def c frecrest(a) as 
b.s-*-0 
b.t -*-NORM 
b.e-^0 
b.f -+-\ 

c -*-fest(fdiv(b,a)) 
enddef 



def c-«-frsqrest(a) as 
b.s^O 
b.t NORM 
b.e-»-0 

b.f 

c fest(fsqr(fdiv(b,a))) 
enddef 
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— 2570 

def c -«-fest(a) as 
if (a.t=NORM) then 
msb ^findmsb(a.f) 
a.e-#-a.e + msb - 13 
a.f-#-a.fmsb..msb-12|| 1 

else 

c-«-a 

endif 
enddef 

def fsqr(a) as 

if (a.t=NORM) and (a.s=0) then 
c.s-+-0 
c.t-*-NORM 
if (a.eo =1) then 

c,e-*-(a.e-127)/2 
c.f-*-sqr(a.f||0 127 ) 

else 

c.e-*-(a.e-128)/2 
c.f^sqr(a.f||0 128 ) 
endif 

elseif (a.t=SNAN) or (a.t-QNAN) or a.t=ZERO or ((a.t=INFINITY) and 
(a.s=0)) then 
c-«-a 

elseir ((a.t=NORM) or (a.t=INFINITY)) and (a.s=1) then 
c-*-DEFAULTSNAN //Invalid 

else 

assert FALSE // should have covered a1 the cases above 
endif 
enddef 
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Operation codes 



GADD.8 


Group add bytes 


G. ADD. 16 


Group add doublets 


G.ADD.32 


Group add quaHlets 


G.ADD.64 


Group add octlets ~1 


G. ADD. 128 


Group add hexlet 


G.ADD.L.8 


Group add limit signed bytes 


G.ADD.L.16 


Group add limit signed doublets 


G.ADD.L.32 


Group add limit signed quadlets 


G.ADD.L.64 


Group add limit signed octlets 


G.ADD.L.128 


Group add limit signed hexlet 


G.ADD.L.U.8 


Group add limit unsigned bytes 


G.ADD.L.U.16 


Group add limit unsigned doublets 


G.ADD.L.U.32 


Group add limit unsigned quadlets 


G.ADD.L.U.64 


Group add limit unsigned octlets 


G.ADD.L.U.128 


Group add limit unsigned hexlet 


G.ADD.8.0 


Group add signed bytes check overflow 


G ADD 16 0 


uroup aaa signed doublets check overflow 


G.ADD.32.0 


Group add signed quadlets check overflow 


G.ADD.64.0 


Group add signed octlets check overflow 


G.ADD. 128.0 


Group add signed hexlet check overflow 


G.ADD.U.8.0 1 


Group add unsigned bytes check overflow 


G.ADD.U.16.0 


Group add unsigned doublets check overflow 


G.ADD.U.32.0 


Group add unsigned quadlets check overflow 


G.ADD.U.64.0 


Group add unsigned octlets check overflow 


G.ADD.U.128.0 


Group add unsigned hexlet check overflow 
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Format 



G.op.size rd=rc,rb 
rd=gopsize(rc,rb) 

31 24 23 18 17 12 11 65 0 

r G.size 1 rd | rc | rb | op 1 

8 " 6 6 6 6 
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Definition 

def Group(op,size,rd,rc,rb) 
c «- RegRead(rc, 128) 
b <- RegRead(rb, 128) 
case op of 
G.ADD: 

for i <r- 0 to 128-size by size 

ai+size-1. .i «- ci+size-1. .i + bi+size-1. .i 
endfor 
G.ADD.L: 

for i 4- 0 to 128-size by size 

t «- (ci+size-1 II ci+size-l..i) + (bi+size-1 II bi+size-1. .0 

ai+size-1 ..i <- (tsize * t s ize-l) ? (tsize II t§j£g:l) : t s ize-l ..0 
endfor 
G.ADD.L.U: 

for i <- 0 to 128-size by size 

t <- (0 1 1| ci+size-l..i) + (0 1 || bi+size-1. .0 
ai+size-1. .i +- (tsize " 0) ? (isize) . t s ize-1..0 
endfor 
G.ADD.O: 

for i «- 0 to 128-size by size 

1 4- (ci+size-1 II ci+size-l..i) + (bi+size-1 II bi+size-1. .0 
if tsize * tsize-l then 

raise FixedPointArithmetic 
endif 

ai+size-1.. i +- tsize- 1..0 
endfor 
G.ADD.U.O: 

for i <- 0 to 128-size by size 

t <- (0l || ci+size-l..i) + (0l || bi+size-l..i) 
if tsize * 0 then 

raise FixedPointArithmetic 
endif 

ai+size-1. .i «- tsize-l. .0 
endfor 

endcase 

RegWrite(rd, 128, a) 
enddef 
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Operation codes 



G.SET.AND.E.8 


Group set and equal zero bytes 


G.SET.AND.E.16 


Group set and equal zero doublets ~ "1 


G.SET.AND.E.32 


Group set and equal zero quadiets 


G.SET.AND.E.64 


Group set and equal zero octlets 


G.SET.AND.E.128 


Group set and equal zero hexlet ~] 


G.SET.AND.NE.S 


Group set and not equal zero bytes 


G.SET.AND.NE.16 


Group set and not equal zero doublets 


G.SET.AND.NE.32 


Group set and not equal zero quadiets 


G.SET.AND.NE.64 


Group set and not equal zero octlets 


G.SET. AND.NE. 128 


Group set and not equal zero hexlet 


G.SET.E.8 


Group set equal bytes 


G.SET.E.16 


Group set equal doublets 


G.SET.E.32 


Group set equal quadiets 


G.SET.E.64 


Group set equal octlets 


G.SET.E.128 


Group set equal hexlet j 


G.SET.GE.8 


Group set greater equal signed bytes 


GSET.GE.16 


Group set greater equal signed doublets 


G.SET.GE.32 


Group set greater equal signed quadiets 


G.SET.GE.64 


Group set greater equal signed octlets 


G.SET.GE.128 


Group set greater equal signed hexlet 


G.SET.GE.U.8 


Group set greater equal unsigned bytes 


G.SET.GE.U.16 


Group set greater equal unsigned doublets 


G.SET.GE.U.32 


Group set greater equal unsigned quadiets 


G.SET.GE.U.64 


Group set greater equal unsigned octlets 


G.SET.GE.U.128 


Group set greater equal unsigned hexlet 


G.SET.L.8 


Group set signed less bytes 


G.SET.L.16 


Group set signed less doublets 


G.SET.L.32 


Group set signed less quadiets 


G.SET.L.64 


Group set signed less octlets 


G.SET.L.128 


Group set signed less hexlet 


G.SET.L.U.8 


Group set less unsigned bytes 


G.SET.L.U.16 


Group set less unsigned doublets 


G.SET.L.U.32 


Group set less unsigned quadiets 


G.SET.L.U.64 


Group set less unsigned octlets 


G.SET.L.u.128 


Group set less unsigned hexlet 


G.SET.NE.8 


Group set not equal bytes 


G.SET.NE.16 


Group set not equal doublets 


G.SET.NE.32 


Group set not equal quadiets 


G.SET.NE.64 


Group set not equal octlets 


G.SET.NE.128 


Group set not equal hexlet 


G.SUB.8 


Group subtract bytes 


G.SUB.8.0 


Group subtract signed bytes check overflow 
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G.SUB.16 


Group subtract doublets 


G.SUB.16.0 


Group subtract signed doublets check overflow 


r»T TT> 


Group subtract quadlels 


G.SUB.32.0 


Group subtract signed quadlets check overflow 


G.SUB.64 


Group subtract octlets 


G.SUB.64.0 


Group subtract signed octlets check overflow 


G. SUB. 128 


Group subtract hexlet 


G.SUB. 128.0 


Group subtract signed hexlet check overflow 


G.SUB.L.8 


Group subtract limit signed bytes 


G.SUB.L.16 


Group subtract limit signed doublets 


G.SUB.L.32 


Group subtract limit signed quadlets 


G.SUB.L.64 


Group subtract limit signed octlets 


G.SUB.L.128 


Group subtract limit signed hexlet 1 


G.SUB.L.U.8 


Group subtract limit unsigned bytes 


G.SUB.L.U.16 


Group subtract limit unsigned doublets 


G.SUB.L.U.32 


Group subtract limit unsigned quadlets 


G.SUB.L.U.64 


Group subtract limit unsigned octlets 


G.SUB.L.U.128 


Group subtract limit unsigned hexlet 


G.SUB.U.8.0 


Group subtract unsigned bytes check overflow 


G.SUB.U.16.0 


Group subtract unsigned doublets check overflow 


G.SUB.U.32.0 


Group subtract unsigned quadlets check overflow 


G.SUB.U.64.0 


Group subtract unsigned octlets check overflow 


G.SUB.U. 128.0 


Group subtract unsigned hexlet check overflow 
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Format 



G.op.size rd=rb,rc 
rd=gopsize(rb,rc) 

31 24 23 18 17 12 11 6 5 0 

\ Csize | rd 1 rc | rb 1 op 

8 6 6 6 6 
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Definition 

def GroupReversed(op,size,rd,rc,rb) 
c «- RegRead(rc, 128) 
b *- RegRead(rb, 128) 
case op of 
G.SUB: 

for i <- 0 to 128-size by size 

ai+$ize*]..i <~ bi+si7e-l..i - ci-f^e-l ..i 
endfor 
G.SUB.L: 

for i <- 0 to 128-size by size 

t <- (bi+size-1 II bi+size-l..i) - fa+size-1 II ci+size-l..i) 

ai+size-l..i «- (tsize * t s ize-l) ? (tsize II t§l$8-l) : tsize-1..0 
endfor 
G.SUB.LU: 

for i 0 to 128-size by size 

t <- (0 1 1| bi+size-l..i) - (0 1 II ci+size-l..i) 

ai+size-i. a +- (t s ize * 0) ? 0 size : t s ize-1..0 
endfor 
G.SUB.O: 

for i 0 to 128-size by size 

t «- (bi+size-1 II bi+size-1. .i) - (ci+size-1 II ci+ s ize-l..i) 
if (tsize* tsize-l) then 

raise FixedPointArithmetic 

endif 

ai+size-l..i<~t s ize-1..0 
endfor 
G.SUB.U.O: 

for i «- 0 to 128-size by size 

t *- (0* || bi+size-l..i) - (0 1 1| ci+size-l.j) 
if(tsize*0)then 

raise FixedPointArithmetic 

endif 

ai+size-L.i <- tsize-L.O 
endfor 
G.SET.E: 

for i <- 0 to 128-size by size 

ai+size-l..i «- (bi+size-1. i - ci+size-i..i) size 
endfor 
G.SET.NE: 

for i «- 0 to 128-size by size 

ai+size-l..i <- (bi+size-i..i * ci+ s ize-l..i) size 
endfor 
G.SET.AND.E: 

for i 4- 0 to 128-size by size 

ai+size-l..i <- ((bi+size-1.. i and ci+size.L.i) = 0) size 
endfor 
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G.SET.AND.NE: 

for i <- 0 to 128-size by size 

ai+size-l..i <- ((bi+size-l..i and ci+ s ize-l..i) * O)* 2 * 

endfor 
G.SET.L: 

for i 0 to 128-size by size 

ai+size-l.J «- ((rc = rb) ? (bi+ s ize-l..i < 0) : (bi+size-l..i < ci+ s ize-l..i)) slze 

endfor 
G.SET.GE: 

for i 4- 0 to 128-size by size 

ai+size-l..i <- ((rc = rb) ? (bi+ s ize-l..i * 0) : (bi+size-i..i ^ ci+size-l..i)) sl2e 
endfor 
G.SET.L.U: 

for i <- 0 to 128-size by size 

ai+size-l..i <- ((rc = rb) ? (bi+ s ize-l..i > 0) : 

((0 II bi+size-l..i) < (0 II ci +S ize-l..i))) size 

endfor 
G.SET.GE.U: 

for i <~ 0 to 128-size by size 

ai+size-l..i <- ((rc = rb) ? (bi+size-l..i ^ 0) : 
((0 1| bi+size-U) * (0 1| ci+size-U))) 3 ^ 

endfor 

endcase 

RegWrite(rd, 128, a) 
enddef 
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Operation codes 



E.CON.o 


Ensemble convolve signed bytes 


E.CON.lo 


Ensemble convolve signed doublets 


E.CON.32 


Ensemble convolve signed quadlets 


E.CON.64 


Ensemble convolve signed octlets 


E.CON.L.o 


Ensemble convolve complex bytes 


E.CON.C.lo 


Ensemble convolve complex doublets 


E.CON.C.32 


Ensemble convolve complex quadlets 


E.CON.M.d 


Ensemble convolve mixed-signed bytes 


E.C0N.M.16 


Ensemble convolve mixed-signed doublets 


E.CON.M.32 


Ensemble convolve mixed-signed quadlets 


E.CON.M.64 


Ensemble convolve mixed-signed octlets 


E.C0N.U.8 


Ensemble convolve unsigned bytes 


E.C0N.U.16 


Ensemble convolve unsigned doublets 


E.CON.U.32 


Ensemble convolve unsigned quadlets 


E.CON.U.64 


Ensemble convolve unsigned octlets 


E.DIV.64 


Ensemble divide signed octlets 


E.DIV.U.64 


Ensemble divide unsigned octlets 


E.MUL.8 


Ensemble multiply signed bytes 


EMUL.16 


Ensemble multiply signed doublets 


E.MUL.32 


Ensemble multiply signed quadlets 


E.MUL.64 


Ensemble multiply signed octlets 


E.MUL.SUM.8 


Ensemble multiply sum signed bytes 


E.MUL.SUM.16 


Ensemble multiply sum signed doublets 


C X/fTTT OTTX/f 

b.MUL.dUM.32 


Ensemble multiply sum signed quadlets 


C X/fTTT CTTXif £/l 

fc.MUL.bUM.o4 


Ensemble multiply sum signed octlets 


t.MUL.t.o 


Ensemble complex multiply bytes 


TT X/fTTT C 1 £ 

t.MUL.C.lo 


Ensemble complex multiply doublets 


C X/fTTT /"* "51 


Ensemble complex multiply quadlets 


E.MUL.M.5 


Ensemble multiply mixed-signed bytes 


E.MUL.M.lo 


Ensemble multiply mixed-signed doublets 


E.MUL.M.32 


Ensemble multiply mixed-signed quadlets 


E.MUL.M.64 


T*^ » 1 1 . ' 1 * J * 1 . • A. 

Ensemble multiply mixed-signed octlets 


T^ X XT TV TV A 

E.MUL.P.8 


T^ ■ I f . • | f • • • 

Ensemble multiply polynomial bytes 


E.MUL.P.16 


Ensemble multiply polynomial doublets 


TT XjfTTT P 10 


Ensemble multiply polynomial quadlets 


E.MUL.P.64 


Ensemble multiply polynomial octlets 


E.MUL.SUM.C.8 


Ensemble multiply sum complex bytes 


E.MUL.SUM.C.16 


Ensemble multiply sum complex doublets 


E.MUL.SUM.C.32 


Ensemble multiply sum complex quadlets 


E.MUL.SUM.M.8 


Ensemble multiply sum mixed-signed bytes 


E.MUL.SUM.M.16 


Ensemble multiply sum mixed-signed doublets 


E.MUL.SUM.M.32 


Ensemble multiply sum mixed-signed quadlets 


E.MUL.SUM.M.64 


Ensemble multiply sum mixed-signed octlets 
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E.MUL.SUM.U.8 


Ensemble multiply sum unsigned bytes 


E.MUL.SUM.U.16 


Ensemble multiply sum unsigned doublets 


E.MUL.SUM.U.32 


Ensemble multiply sum unsigned quadlets 


E.MUL.SUM.U.64 


Ensemble multiply sum unsigned octlets 


E.MUL.U.8 


Ensemble multiply unsigned bytes 


E.MUL.U.16 


Ensemble multiply unsigned doublets 


E.MUL.U.32 


Ensemble multiply unsigned quadlets 


E.MUL.U.64 


Ensemble multiply unsigned octlets 
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Format 



E.op.size rd=rc,rb 
rd=copsize(rc,rb) 

31 24 23 1817 1211 65 0 

I E.size | rd 1 rc I rb | op 1 

8 6 6 ^6 6 
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Definition 

def mul(size,h,vs,v,i,ws,w j) as 

mul *- ((vs&v size -i + j) h -size || v s ize-l+Li) * ((ws&wsize-i+j)^^ II wsjze-i+j. j) 
enddef 

def c «- PolyMultiply(size,a,b) as 
p[01 <- 02*size 

fork*- 0 tosize-l 

p[k+l] <- p(k] A ak ? (0 size - k || b || 0 k ) : 0 2 **ize 
endfor 
c <- p[size] 
enddef 

def Ensemblefap^izcrd^rcjb) 
c <- RegRead(rc, 128) 
b <- RegRead(rb, 128) 
case op of 

E.MUL:, E.MUL.C:, EMUL.SUM, E.MUL.SUM.C, E.CON, E.CON.C, E.DIV: 

cs <- bs «- 1 
E.MUL.M:, EMUL.SUM.M, E.CON.M: 

cs «- 0 

bs<- 1 

E.MUL.U:, EMUL.SUM.U, E.CON.U, E.DIV.U, E.MUL.P: 
cs <- bs <- 0 

endcase 
case op of 

E.MUL, E.MUL.U, E.MUL.M: 
for i <r- 0 to 64-size by size 

d 2*(i+size)-i..2*i +~ mul(size,2*size,cs,c,i,bs,b,i) 
endfor 
E.MUL.P: 

for i «- 0 to 64-size by size 

d2*(i+size)-l ..2*i <- PolyMultiply(size,c s ize-i+i..i,b s ize-l+i..i) 
endfor 
E.MUL.C: 

for i «- 0 to 64-size by size 
if (i and size) = 0 then 

p «- mul(size,2*size,l,c,i,l,b,i) - mul(size > 2*size,l,c,i+size,l,b,i+size) 

else 

p <- mul(size,2*size,l,c,i,l,b,i+size) + mul(size,2*size,l,c,i,l,b,i+size) 

endif 

d2*(i+size)-1..2*i<-p 
endfor 

E.MUL.SUM, E.MUL.SUM.U, E.MUL.SUM.M: 
p[0]^0i28 

for i <- 0 to 128-size by size 

p[i+size] <- p[i] + mul(size,128,cs,c,i,bs,b,i) 
endfor 
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a<-p[128] 
E.MUL.SUM.C: 

p[0] <- o 64 

p[size] <- 0 64 

for i <- 0 to 128-size by size 
if (i and size) = 0 then 

p[;+2*3izcj <— p[i] -! iriUi(siza,64,l,c,i,l,b,i) 

- mul(size,64, 1 ,c,i+size, 1 ,b,i+size) 

else 

p(i+2*size] <- p[i] + mul(size,64,l,c,i,l,b,i+size) 
+ mul(size,64,l,c,i+size,l,b,i) 

endif 
endfor 

a*-p[128+size] || p[128] 

E.CON, E.CON.U, E.CON.M: 
p[0]<-0 128 

for j <- 0 to 64-size by size 

for i <~ 0 to 64-size by size 

pD+size]2*(i+size)-1..2*i <- P[fl2*(i+size)-1..2*i + 
muI(size,2*size,cs,c,i+64-j,bs,bj) 

endfor 
endfor 
a<-p[64] 
E.CON.C: 

p[0]<-0 128 

for j <- 0 to 64-size by size 

for i <- 0 to 64-size by size 

if (H) and j and size) = 0 then 

p{j+size]2*(i+size)-L.2*i «- p(j]2*(i+size)-1..2*i 
mul(size,2*size, 1 ,c,i+64-j, i ,b j) 

else 

p[j+size]2*(i+size)-1..2*i <- p[j]2*(i+size)-1..2*i 
mul(size,2*size, 1 ,c,i+64-j+2*size, 1 ,b j) 

endif 
endfor 
endfor 
a<-p[64] 
E.DIV: 

if (b = 0) or ( (c = (1||0 6 3)) and (b - l 64 ) ) then 
a <r- undefined 
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else 

q<-c/b 
r 4- c - q*b 

a<-r63..0llq63..0 

endif 
E.DIVy: 

ifb = Othen 

a <r- undefined 

else 

q«-<0||c)/(0|b) 
r<-c-(0||q)*(0||b) 

a<-r63..0 II q63..0 

endif 

endcase 

RegWrite(rd, 128, a) 
enddef 
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Floating-point function Definitions 

def eb «- ebits(prec) as 
case pref of 
16: 

eb<-5 

32: 

sb <- 8 

64: 

eb<- 11 

128: 

eb<- 15 

endcase 
enddef 

def eb «- ebias(prec) as 

eb <— 0 1| iebits(prec)-l 
enddef 

def fb <- fbits(prec) as 

fb <- prec - 1 - eb 
enddef 

def a <r- F(prec, ai) as 
a.s<-ai p rec-l 
ae<- aip re c-2..fbits(prec) 
af4-aifbits(prec)-1..0 

ifae=l ebits (P rec >then 

ifaf=Othen 

a.t <- INFINITY 

elseifafft>its(prec)-l then 
a.t <- SNaN 
a.e <r- -fbits(prec) 
ai<- 1 1| affbits(prec)-2., 

else 

a.t <r- QNaN 
a.e <- -fbits(prec) 
a.f<-af 

endif 
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eiseif ae = 0 then 
ifaf=Othen 

a.t <- ZERO 

else 

a.t <- NORM 

a.e <- i-ebias(prec)-fbits(prec) 
a.f<-0||af 

endif 

else 

a.t «- NORM 

a.e «- ae-ebias(prec)-fbits(prec) 
a.f 4- 1 || af 

endif 
enddef 

def a <- DEFAULTQNAN as 

a.s<-0 

a.t <- QNAN 

a.e <- -1 

a.f<-l 
enddef 

def a <- DEFAULTSNAN as 

a.s<-0 

a.t <- SNAN 

a.e <- -1 

a.f<-l 
enddef 

def fedd(a,b) as feddr(a,b,N) enddef 

def c <- faddr(a,b,round) as 

if a.t=NORM and b.t=NORM then 

// d,e are a,b with exponent aligned and fraction adjusted 
if a.e > b.e then 
d <- a 
e.t <r- b.t 
e.s <- b.s 
e.e «- a.e 

e.f<-b.f||O a - e - b - e 
else if a.e < b.e then 
d.t<-a.t 
d.s <r- a.s 
d.e <- b.e 
d.f«-a.f|| 0^-a.e 
e <- b 
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endif 
c.t <- d.t 
c.e <- d.e 
if d.s = e.s then 

c.s <- d.s 

c.f«-d.f+e.f 
elseif d.f > e.fthen 

c.s «- d.s 

c.f«-d.f-e.f 
eiseifd.f< e.fthen 

c.s <- e.s 

c.f«-e.f-d.f 

else 

c.s p=F 
c.t <r- ZERO 

endif 

// priority is given to b operand for NaN propagation 
elseif (b.t=SNAN) or (b.t=QNAN) then 
c <— b 

elseif (a.t=SNAN) or (a.t=QNAN) then 
c«-a 

elseif a.t=ZERO and b.t=ZERO then 
c.t <- ZERO 

c.s <- (a.s and b.s) or (round=F and (a.s or b.s)) 
// NULL values are like zero, but do not combine with ZERO to alter sign 
elseif a.t=ZERO or a.t=NULL then 

c <- b 

elseif b.t=ZERO or b.t=NULL then 
c <- a 

elseif a.t=INFINITY and b.t=INFINITY then 
if a.s * b.s then 

c <- DEFAULTSNAN // Invalid 

else 

c <- a 

endif 

elseif a.t=INFINITY then 
c«-a 

elseif b.t=INFINITY then 
c«-b 

else 

assert FALSE // should have covered al the cases above 

endif 
enddef 

def b <- fheg(a) as 

b.s <- -a.s 

b.t <- a.t 

b.e a.e 

b.f<-a.f 
enddef 
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def fcubr(a,b,round) as faddr(a,fheg(b),round) enddef 

def frsub(a,b) as frsubr(a,b,N) enddef 

def frsubr(a,b,round) as faddr(foeg(a),b,round) enddef 

def c <- fcom(a,b) as 

if (a.t=SNAN) or (a.t=QNAN) or (b.t=SNAN) or (b.t=QNAN) then 
c<-U 

elseif a.t=INFINITY and b.t=INFINITY then 
if a.s * b.s then 

c «- (a.s=0) ? G: L 

else 

c<-E 

endif 

eiseif a.t=INFINITY then 

c 4- (a.s=0) ? G: L 
elseif b.t=INFINITY then 

c <- (b.s=0) ? G: L 
elseif a.t=NORM and b.t=NORM then 

if a s * b.s then 

c <- (a.s=0) ? G: L 

else 

if a.e > b.e then 
af<-a.f 

bf<-b.f||O ae - b -e 

else 

af<-a.f||O b -e-a.e 
bf<-b.f 

endif 

ifaf=bfthen 
c«-E 

else 

c«-((a.s=0) A (af>bf))?G:L 

endif 

endif 

elseif a.t=NORM then 

c 4- (a.s=0) ? G: L 
elseif b.t=NORM then 

c <- (b.s=0) ? G: L 
elseif a.t=ZERO and b.t=ZERO then 

c <- E 

else 

assert FALSE // should have covered al the cases above 

endif 
enddef 
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def c <- finul(a,b) as 

if a.t=NORM and b.t=NORM then 

c.s <- a.s A b.s 

c.t <- NORM 

c.e a.e + b.e 

c.f<-a.f*b.f 
// priority is given to b operand for NaN propagation 
elseif (b.t=SNAN) or (b.t=QNAN) then 

c.s <- a.s A b.s 

c.t <- b.t 

c.e b.e 

c.f«-b.f 

elseif (a.t=SNAN) or (a.t=QNAN) then 
c.s a.s A b.s 
c.t <- a.t 
c.e <- a.e 
c.f <- a.f 

elseif a.t^ZERO and b.t=INFINITY then 

c <- DEFAULTSNAN // Invalid 
elseif a.t=INFrNITY and b.t=ZERO then 

c <- DEFAULTSNAN // Invalid 
elseif a.t=ZERO or b.t=ZERO then 

c.s «- a.s A b.s 

c.t <- ZERO 

else 

assert FALSE // should have covered al the cases above 

endif 
enddef 

def c <- fdivr(a,b) as 

if a.t=NORM and b.t=NORM then 

c.s *- a.s A b.s 

c.t <r- NORM 

c.e a.e - b.e + 256 

c.f<-(a.f|| 0 256 )/b.f 
// priority is given to b operand for NaN propagation 
elseif (b.t=SNAN) or (b.t=QNAN) then 

c.s <- a.s A b.s 

c.t <- b.t 

c.e <- b.e 

c.f <- b.f 

elseif (a.t=SNAN) or (a.t=QNAN) then 
c.s a.s A b.s 
c.t «- a.t 
c.e <- a.e 
c.f<-a.f 
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elseif a.t=ZERO and b.t=ZERO then 

c <r- DEFAULTSNAN // Invalid 
elseif a.t=INFIN!TY and b.t=INFINITY then 

c <- DEFAULTSNAN // Invalid 
elseif a.t=ZERO then 

c.s «- a.s A b.s 

c.t ZERO 
elseif a.t=INFINITY then 

c.s <- a.s A b.s 

c.t <- INFINITY 

else 

assert FALSE // should have covered al the cases above 

endif 
enddef 

def msb <- findmsb(a) as 

MAXF <- 2 18 // Largest possible f value after matrix multiply 
forj<-0to MAXF 

if aMAXF-l.j = (OMAXF-l-j || i) then 
msb 4- j 

endif 
endfor 
enddef 

def ai <- PackF(prec,a,round) as 
case a.t of 
NORM: 

msb findmsb(a.f) 

rn <- msb-l-fbits(prec) // lsb for normal 

rdn <- -ebias(prec)-a.e-l-fbits(prec) // lsb if a denormal 

rb <- (rn > rdn) ? rn : rdn 
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if rb < 0 then 

aifr<-a.fmsb-1..0 II 0" rb 
eadj <r- 0 

else 

case round of 

C: 

s <- omsb-rb \\ (-. a s \rb 

F: 

s <- omsb-rb g ( a s )ri> 
N, NONE: 

s< _ omsb-rb )ha frb || a f r^l 

X: 

ifa.f r b-l..o*Othen 

raise FIoatingPointArithmetic // Inexact 

endif 
s <- 0 

Z: 

S <r- 0 

endcase 

v^(0||a.f ms b..0) + (0||s) 
if v ms b = 1 then 

aifr<-v ms b-l..rb 
eadj <r- 0 

else 

aifr <- O^Mprec) 
eadj <- 1 

endif 

endif 

aien <- a.e + msb - 1 + eadj + ebias(prec) 
if aien £ 0 then 

if round = NONE then 

ai<-a.s || oebits(prec) y ^ 

else 

raise FIoatingPointArithmetic //Underflow 

endif 

elseif aien £ iebits(prec) Am 
if round = NONE then 

//default: round-to-nearest overflow handling 
ai <- a.s || iebits(prec) jj 0 fbits(prec) 

else 

raise FIoatingPointArithmetic //Underflow 

endif 

else 

ai <- a.s || aienebits(prec)-1..0 II aifr 

endif 
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SNAN: 

if round # NONE then 

raise FloatingPointArithmetic //Invalid 

endif 

if -a.e < fbits(prec) then 

ai <- a.s || iebits(prec) || a .f. a .e-1..0 II 0*ta<P»wO+a.e 

else 

IsK *~ » £ ~ a 0 1 D A%;^/nro/»\j. I A 3t 0 

. l*.W • 4..V 

ai <- a.s || iebits(prec) || a.f. a .e.l...a.e.I-fbits(prec)+2 II bb 

endif 
QNAN: 

if -a.e < fbits(prcc) then 

ai <- a.s || lebits(prec) g a .f. a .e-1..0 II O f b it s(prec)+a.e 

else 

isb <- a.f. a . e -l-fbits(prec)+1..0 * 0 
ai «- a.s 

|| ie bits(prec) |, a.f. a . e 4..-a.e-l-fbits(prec)+2 II Isb 

endif 
ZERO: 

ai <- a.s || O eb its(prec) \\ 0 fbits(prec) 
INFINITY: 

ai <- a.s || iebits(prec) || 0 fbits(prec) 

endcase 
defdef 

def ai <- fsinkr(prec, a, round) as 
case a t of 
NORM: 

msb <r- findmsb(a.f) 
rb <- -a.e 
ifrb^Othen 

aifr<-a.fnisb..Ol|0- rb 
aims <- msb - rb 

else 

case round of 



C, CD: 




s 4- o ms ' J * r b | 


| (-ai-sj^ 


F, F.D: 




s ^_ Qinsb-rb | 


| (ai.s)* 


N, NONE: 




s <- omsb-rb | 


l~ai.frbll 


X: 





ifai.f r b-1..0*0then 

raise FloatingPointArithmetic // Inexact 

endif 
s <-0 

Z, Z.D: 
s <-0 
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• endcase 
v ^- (0||a.fmsb..O) + (0||s) 
if vmsb = 1 then 

aims «- msb + 1 - rb 

else 

aims msb - rb 

endif 

aitr<- vaims..rb 

endif 

if aims > prec then 
case round of 

CD, F.D, NONE, Z D: 
ai«-a.s ||(~as)Prec-l 

C, F, N, X, Z: 

raise FIoatingPointArithmetic // Overflow 

endcase 
elseif a.s = 0 then 
ai aifr 

else 

ai <- -aifr 

endif 
ZERO: 

ai <- 0P rec 
SNAN, QNAN: 
case round of 

CD, F.D, NONE, Z.D: 

ai «- 0P rec 
C, F,N,X,Z: 

raise FIoatingPointArithmetic // Invalid 

endcase 
INFINITY: 

case round of 

CD, F.D, NONE, Z.D: 

ai <- a.s || (-asJPre 0 - 1 
C,F,N,X,Z: 

raise FIoatingPointArithmetic // Invalid 

endcase 

endcase 
enddef 



def c «- frecrest(a) as 
b.s<-0 
b.t <- NORM 
b.e<-0 
b.f<-l 

c *- fest(fdiv(b,a)) 
enddef 



FIG. 29-9 



def c <- frsqrest(a) as 
b.s <- 0 
b.t <- NORM 
b.e <- 0 
b.f<- 1 

c «- fest(fsqr(fdiv(b,a))) 
enddef 

def c <r- fest(a) as 

if(a.t=NORM) then 

msb «- findmsb(a.f) 
a.e «- a.e + msb - 13 
a.f^a.fnisb..msb-12ll I 

else 

c a 

endif 
enddef 

def c <- fsqr(a) as 

if (a.t=NORM) and (a.s=0) then 
c.s <- 0 
at «- NORM 
if(a.eo= l)then 

c.e<-(a.e-127)/2 
c.f<-sqr(a.f||0 127 ) 

else 

c.e<-(a.e-128)/2 
c.f<-sqr(a.f||0 128 ) 

endif 

elseif (a.t=SNAN) or (a.t=QNAN) or a.t=ZERO or ((a.t=INFINITY) and (a.s==0)) then 
c <~ a 

elseif ((a.t=NORM) or (a.t=INFINITY)) and (a.s=l) then 
c <- DEFAULTSNAN // Invalid 

else 

assert FALSE // should have covered al the cases above 

endif 
enddef 
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Operation codes 



E.ADD.F.16 


Ensemble add floating-point half 


E.ADD.R16.C 


Ensemble add floating-point half ceiling 


E.ADD.F.16.F 


Ensemble add floating-point half floor 


E.ADD.F.16.N 


Ensemble add floating-point half nearest 


E.ADD.F.16.X 


Ensemble add floating-point half exact 


E.ADD.F.i6.Z 


Ensemble add floating-point half zero 


E.ADD.F.32 


Ensemble add floating-point single 


E.ADD.F.32.C 


Ensemble add floating-point single ceiling 


E.ADD.F.32.F 


Ensemble add floating-point single floor 


E.ADD.F.32.N 


Ensemble add floating-point single nearest 


E.ADD.F.32.X 


Ensemble add floating-point single exact 


E.ADD.F.32.Z 


Ensemble add floating-point single zero j 


E.ADD.F.64 


Ensemble add floating-point double 


E.ADD.F.64.C 


Ensemble add floating-point double ceiling 


E.ADD.F.64.F 


Ensemble add floating-point double floor 


E.ADD.F.64.N 


Ensemble add floating-point double nearest 


E.ADD.F.64.X 


Ensemble add floating-point double exact 


E.ADD.F.64.Z 


Ensemble add floating-point double zero 


E.ADD.F.128 


Ensemble add floating-point quad 


E.ADD.F.128.C 


Ensemble add floating-point quad ceiling 


E.ADD.F.128.F 


Ensemble add floating-point quad floor 


E.ADD.F.128.N 


Ensemble add floating-point quad nearest 


E.ADD.F. 128.X 


Ensemble add floating-point quad exact 


E.ADD.F.128.Z 


Ensemble add floating-point quad zero 


E.DIVF.16 


Ensemble divide floating-point half 


E.DIV.F.16.C 


Ensemble divide floating-point half ceiling 


E.DIV.F.16.F 


Ensemble divide floating-point half floor 


EJDIV.F.16.N 


Ensemble divide floating-point half nearest 


E.DIV.R16.X 


Ensemble divide floating-point half exact 


E.DIV.F.16.Z 


Ensemble divide floating-point half zero | 


E.DIV.F.32 


Ensemble divide floating-point single 


E.DIV.F.32.C 


Ensemble divide floating-point single ceiling 


E.DIV.R32.F 


Ensemble divide floating-point single floor 


E.DIV.F.32.N 


Ensemble divide floating-point single nearest 


E.DIV.F.32.X 


Ensemble divide floating-point single exact 


E.DIV.F.32.Z 


Ensemble divide floating-point single zero 


E.DIV.F.64 


Ensemble divide floating-point double 
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E.DIV.F.64.C 


Ensemble divide floating-point double ceiling 


E.DIV.F.64.F 


Ensemble divide floating-point double floor 


E.DIV.F.64.N 


Ensemble divide floating-point double nearest 


E.DIV.F.64.X 


Ensemble divide floating-point double exact 


E.DIV.F.64.Z 


Ensemble divide floating-point double zero 


E.DIVF. 128 


Ensemble divide floating-point quad 


E.DIVF. 128.C 


Ensemble divide floatinc-noint ouad ceilina 


E DIV F 128 F 


Ensemble divide floatina-nnint miaH flnnr 


E.DIVJF.128.N 


Ensemble divide floatina-nnint nnarl nrarpct 

w» ••w»w Ui tiuv AlvUIUlK UvliU UUCm II Will xrjl 


E.DIVF 128 X 


Ensemble divide floatinc-nnint ouaH evart 


E.DIVF 128 Z 


Ensemble divide fl oati n i? -nn i nt nuaH 7/»rn 


E.MUL.C.R16 


Ensemble multiDlv comolex floatina-nnint half 


E.MUL.C.F.32 


Ensemble muitinlv cnmnlex flnatina-nnint cinol^ 


EMULCF64 


Ensemble muitinlv cnmnlex flnatina-nnint Hmihlp 

uiwviiiuiv iiiuiii^sijr will^lvA HUulUlg UUUU1C 


E.MUL.F.16 


Ensemble muitinlv float in c-noint half 


E.MUL.F.16.C 


Ensemble muitinlv floatinc-noint half ceilina 


E.MUL.F.16.F 


Ensemble muitinlv floatinc-ooint half flnnr 


E.MUL.F.16.N 


Ensemble multiDlv floatintz-noint half nearest i 


E.MUL.F.16.X 


Ensemble muitinlv float in g-Doint half exact I 


E.MUL.F.16.Z 


Ensemble muitinlv floatinc-noint half zero 


E.MUL.F.32 


Ensemble multiDlv floatinc-noint single 

«wii«yvi»tuiv niuivipi j iivumijtj UvUl^ *j lull all W 


E.MUL.F.32.C 


Ensemble multiolv floatimz-noint sincle ceil in o 


E.MUL.F.32.F 


Ensemble multiolv floatina-nnint sincle flnnr 


E.MUL.F.32.N 


Ensemble multiolv floatina-noint sintrle nearest 


E.MUL.F.32.X 


Ensemble multiolv floatinc-ooint sinele exact 


E.MUL.F.32.Z 


Ensemble multiolv float in g-noint sinele zern 


E.MUL.F.64 


Ensemble multiply floating-point double 


E.MUL.F.64.C 


Ensemble multiply floating-point double ceiling 


E.MUL.F.64.F 


Ensemble multiolv floatine-ooint double floor 


E.MUL.F.64.N 


Ensemble multiolv floatins-noint double nearest 


E.MUL.F.64.X 


Ensemble multiolv floatinc-noint double exact 


E.MUL.F.64.Z 


Ensemble multiply floating-point double zero 


E.MUL.F.128 


Ensemble multiply floating-point quad 


E.MUL.F.128.C 


Ensemble multiply floating-point quad ceiling 


E.MUL.F.128.F 


Ensemble multiply floating-point quad floor 


E.MUL.F.128.N 


Ensemble multiply floating-point quad nearest 


E.MUL.F. 128.X 


Ensemble multiply floating-point quad exact 


E.MUL.F.128.Z 


Ensemble multiply floating-point quad zero 
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Selection 



class 


op 


prec 


round/trap 


uuu 


EADDF 


16 


32 


64 


128 


noneCFNXZ 


divide 


EDIVF 


16 


32 


64 


128 


noneCFNXZ 


multiply 


EMULF 


16 


32 


64 


128 


noneCFNXZ 


complex multiply 


EMUL.CF 


16 


32 


64 




none 



Format 

E.op.prec.round rd=rc,rb 
rd=eopprecround(rc,rb) 

r 24 , 23 18 } 1 12 11 6 5 Q 

I E P rCC ' rd 1 rc I rb I on.r=T 

8 6 6 6 6 
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.. Definition 

def mul(size,v,i,w j) as 

mul <- finul(F(size,vsize-l+i..i),F(size,w s i ze -l+j. j)) 
enddef 

def EnsembleFloatingPoint(op,prec,round,ra,rb,rc) as 
c <r- RegRead(rc, 128) 
b <- RegRead(rb, 128) 
for i <- 0 to 128-prec by prec 
ci4-F(prec,ci+ P rec-i..i) 

bi<-F(prec,bi+ p rec-l..i) 
case op of 

E.ADD.F: 

ai <- faddr(ci,bi,round) 
E.MUL.F: 

ai <- fmul(ci,bi) 
E.MUL.C.F: 

if (i and prec) then 

ai <- fadd(mul(prec,c,i,b,i-prec), mui(prec,c,i-prec,b,i)) 

else 

ai <- fsub(mul(prec,c,I,b,l), mul(prec,c,i+prec,b,i+prec)) 

endif 
E.DIV.F.: 

ai <- fdiv(ci,bi) 

endcase 

ai+prec-1 ..i <- PackF(prec, ai, round) 
endfor 

RegWrite(rd, 128, a) 
enddef 
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Operation codes 



E.SUB.F.16 


Ensemble subtract floating-point half 


E.SUB.F.16.C 


Ensemble subtract floating-point half ceiling 


E.SUB.F.16.F 


Ensemble subtract floating-point half floor 


E.SUB.F.16.N 


Ensemble subtract floating-point half nearest 


E.SUB.F.16.Z 


Ensemble subtract floating-point half zero 


E.SUB.F.16.X 


Ensemble subtract floating-point half exact 


E.SUB.F.32 


Ensemble subtract floating-point single i 


E.SUB.F.32.C 


Ensemble subtract floating-point single ceiling 


E.SUB.F.32.F 


Ensemble subtract floating-point single floor 


E.SUB.F.32.N 


Ensemble subtract floating-point single nearest 


E.SUB.F.32.Z 


Ensemble subtract floating-point single zero 


E.SUB.F.32.X 


Ensemble subtract floating-point single exact 


E.SUB.F.64 


Ensemble subtract floating-point double 


E.SUB F.64.C 


Ensemble subtract floating-point double ceiling 


E.SUB.F.64.F 


Ensemble subtract floating-point double floor 


E.SUB.F.64.N 


Ensemble subtract floating-point double nearest 


E-SUB.F.64.Z 


Ensemble subtract floating-point double zero 


E.SUB.F.64.X 


Ensemble subtract floating-point double exact 


E.SUB.F.128 


Ensemble subtract floating-point quad 


E.SUB.F.128.C 


Ensemble subtract floating-point quad ceiling 


E.SUB.F.128.F 


Ensemble subtract floating-point quad floor 


E.SUB.F.I28.N 


Ensemble subtract floating-point quad nearest 


E.SUB.F.128.Z 


Ensemble subtract floating-point quad zero 


E.SUB.F. 128.X 


Ensemble subtract floating-point quad exact 
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Selection 



class 


°P 


prec 


round/trap 


set 


SET. 

E LG 
L GE 


16 32 64 128 


NONE X 


subtract 


SUB 


16 32 64 128 


noneCFNXZ 



Format 



E.op.prec.round rd=rb,rc 
rd=eopprecround(rb,rc) 

31 24 23 18 17 12 11 6 5 0 



1 


E.prec 


| 


rd 


rc 


rb 


op.round | 




8 




6 


6 


6 


6 
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Definition 



def EnsembleReversedFIoatingPoint(op,prec,round,rd,rc,rb) as 
c «- RegRsad(rc, 122) 
b <- RegRead(rb, 128) 
for i <- 0 to 128-prec by prec 

ci<-F(prec,ci+ pr ec.l..i) 

bi^F(prec,b|+ pr ec.l,.i) 

ai «- frsubr(ci,-bi, round) 

ai+prec-l..i <~ PackF(prec, ai, round) 
endfor 

RegWrite(rd, 128, a) 
enddef 
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Operation codes 





viudduai wjiiipic^s signeu pecics 


y rnMPPP^Q 4 

A.wVyivirivCoo.H 


Crossbar compress signed nibbles 




Crossbar compress signed bytes 


A.l^VJIVLrivcoo. ID 


Crossbar compress signed doublets 




Crossbar compress signed quadlets 


Y POMPttPCQ 

A.CWlVJLrlvCOO.OH 


Crossbar compress signed octlets 


Y PHMPPPCC 10Q 


Crossbar compress signed hexlet 


Y PHlVyfPPTJQC no 
A.. \^ \J£viri\slo o . U . Z 


Crossbar compress unsigned pecks 


A. LUMrKto a . U . 4 


Crossbar compress unsigned nibbles 


y pnxynyDcco no 
A. tUMr Kco o . U . o 


Crossbar compress unsigned bytes 


Y rYW/TDPPCQ tt u 
A.^vJJVli*rvCoo.U. 10 


Crossbar compress unsigned doublets 


A . UUIYLrlvJio o . U . j Z 


Crossbar compress unsigned quadlets 


Y PfWyfTTOCCC T T (LA 

A. v VlIVLr ivCo 5 . U . 04 


Crossbar compress unsigned octlets 


Y PrWyfDDTTCC TT no 

A.^UMx'Kiloo.U. lzo 


Crossbar compress unsigned hexlet 


A. C AT /\IN LI . Z 


Crossbar expand signed pecks 


Y FYP A\TT\ 4 
A.H AJr /\TN LI . 4 


Crossbar expand signed nibbles 


A . E, AT AIN LI . o 


Crossbar expand signed bytes 


Y pyt>axit\ i (L 

A.H Ar AJN U . 1 0 


Crossbar expand signed doublets 


Y T»YT> AXm 10 
A . H AlVMN LI . J Z 


Crossbar expand signed quadlets 


Y rYDAMH (LA 
A. xi Ax* AXN U . 04 


Crossbar expand signed octlets 


Y FYPAXm 10Q 
A.xiA-r/\INLI. 1 Zo 


Crossbar expand signed hexlet 


Y PYPAMH T T O 


Crossbar expand unsigned pecks 


Y FYPAND TT A 


Crossbar expand unsigned nibbles 


X EXPAND T I 8 


viuajuai tAj^ailU UllSlgnCd OV16S 


X EXPAND IJ 16 


viwjikm wAj/aiiU UilMgnCQ QOUDieiS 


X EXPAND U 32 




X EXPAND U 64 




X.EXPAND.U. 128 


Crossbar exoand unsigned hrelrt 


XR0TL.2 


Crossbar rotate left nerlrc ™" — " 

vivMvoi luiaiv ICli POCKS 


X.R0TL.4 


Crossbar rotate left nihhlpc 


X.R0TL.8 


Crossbar rotate left Hvt*»c * ™~ 1 


X.R0TL.16 


Crossbar rotate left MnnKWe 


X.ROTL.32 


Crossbar rotate left quadlets 


X.ROTL.64 


Crossbar rotate left octlets 


XROTL.128 


Crossbar rotate left hexlet 


XR0TR.2 


Crossbar rotate right pecks 


X.R0TR.4 


Crossbar rotate right nibbles 


XR0TR.8 


Crossbar rotate right bytes 


X.R0TR.16 


Crossbar rotate right doublets 
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X.ROTR.32 


Crossbar rotate right quadlets 


X.ROTR.64 


Crossbar rotate right octlets 


X.ROTR.128 


Crossbar rotate right hexlet 


X.SHL.2 


Crossbar shift left pecks 


X.SHL.2.0 


CJrOSSbar Shift left 3l0neH ivrk f*h*»rlr r\\7arf\r\-**i 


X.SHL.4 


[Crossbar shift left nibbles 


X.SHL.4.0 


Crossbar Shift left cicmpH nihhlpc rhon\r At;arfl/Mn 


X.SHL.8 


Crossbar shift left bytes 


X.SHL.8.0 


Crossbar Shift left ^icmpH Hvt#»« rhprL* r\\iarf\r\\\j 


X.SHL.16 


Crossbar shift left doublets 


X.SHL.16.0 


CrOSSDar Snlft lpft cicmprl Hr«iK1f»te r>V\ckf*lr m r **w-(\ . 


X.SHL.32 


Crossbar shift left quadlets 


X.SHL.32.0 


CrOSSbar Shift left <I!OTtpH miaHlpfc rh*»r»lr s\trA«-flsv«if 


X.SHL.64 


Crossbar shift left octlets 


X.SHL.64.0 


UrOSSbar Shift left SiOTieH rwtlpfc r\\e*r*\r nt/arfl/Mn 


X.SHL.128 


Crossbar shift left hexlet 


X.SHL. 128.0 


Crossbar Shift left CicmpH hpvlpf rhprlr Aimrfl/Mii 

v»w«wm jinn iwn di^iicu iicajci wncwK uveniow 


X.SHL.U.2.0 


UrOSSbai* Shift left tincifmprl r\f±r*\re r-\\Ae*\r mrarfl 


X.SHL.U.4.0 


CrOSSDaT Shift left nncicmprl niKKloe n\\t±r>\r Aimrfl/M-it 


X.SHL.U.8.0 


CrOSSbar Shift left tincionprl hvfpc r*Vio/^Lr sM/Af-fl/vivr 


X.SHL.U.16.0 


Crossbar Shift left unsicmeH Hmihlptc rh&r*\r nvArflmtr 


X.SHL.U.32.0 


Crossbar Shift left unsigned nnaHletc rh<*r*lr r»\/oi-fl<-kiT/ 


X.SHL.U.64.0 


CrOSSbar Shift left linSIOTieH r"V*t1ptc nuflrflnm 


X.SHL.U. 128.0 


Crossbar shift left unsionerf hpvlpt rh*»rlr r\i/orfln^; " 


X.SHR.2 


Crossbar signed shift right pecks 


XSHR.4 


Crossbar signed shift rieht nibbles 


X.SHR.8 


Crossbar signed shift right bytes ] 


X.SHR.16 


Crossbar signed shift right doublets 


X.SHR.32 


Crossbar signed shift right quadlets 


X.SHR.64 


Crossbar signed shift right octlets 


X.SHR.128 


Crossbar signed shift right hexlet 


X.SHR.U.2 


Crossbar shift right unsigned pecks ' 


X.SHR.U.4 


Crossbar shift right unsigned nibbles 


X.SHR.U.8 


Crossbar shift right unsigned bytes 1 


X.SHR.U.16 


Crossbar shift right unsigned doublets 


X.SHRU.32 


Crossbar shift right unsigned quadlets 


X.SHR.U.64 


Crossbar shift right unsigned octlets 


X.SHR.U.128 


Crossbar shift right unsigned hexlet 
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Selection 



class 


op 


size 


precision 


EXPAND EXPAND.U 
COMPRESS COMPRESS U 


2 4 8 16 32 64 128 


shift 


ROTR ROTL SHR SIIL 
SHL.O SHL.U.O SHR.U 


2 4 8 16 32 64 128 



Format 

X.op.size rd=rc,rb 
rd=xopsize(rc,rb) 

?1 2 24 23 1817 12 11 6 5 1 

I XSHIFT Isl rd | rc I rb 1 op 1, 
7 1 6 6 6 ' fl 

Isize <- log(size) 
s <- lsize2 
sz <- lsize i,.o 



FIG. 32B 



Definition 

def Crossbar(op,size,rd,rc,rb) 
c <- RegRead(rc, 128) 
b <- RegRead(rb, 128) 
shift <- band (size-1) 
case op5..2 II 0 2 of 

X. COMPRESS: 

hsize <- size/2 

for i <r- 0 to 64-hsize by hsize 
if shift < hsize then 

ai+hsize-l..i <- ci^+ s hift+hsize-l.j+i+ shift 

else 

ai+hsize-l..i <- cf^hsjzp || ci + i +size .i.. W+shift 

endif 

endfor 

ai27..64 <-0 
XCOMPRESS.U: 

hsize <r- size/2 

for i <- 0 to 64-hsize by hsize 
if shift < hsize then 

ai+hsize-L.i <- ci^+shift+hsize-1 J+i+shift 

else 

ai+hsize-l..i <- OsWft-hsize „ c W+siz e.i..i + i +shift 

endif 

endfor 

ai27..64<-0 
X.EXP AND: 

hsize <- size/2 

for i <r- 0 to 64-hsize by hsize 
if shift < hsize then 

ai+i+size-l..i+i <- cfe|h^t || q+hsize .j ^ || 0 shift 

else 

ai+i+sLze-l. i+i <- q+size-shift-1 i II 0 shift 

endif 

endfor 
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X.EXP AND.U: 

hsize <r- size/2 

for i <- 0 to 64-hsize by hsize 
if shift £ hsize then 

ai+i+size-l..i+i <- Ohsi^e-shift y q +hsize-1 j || oshift 

else 

ai+i+size-l..i+i «- q+size-shift-U II 0 shift 

endif 

endfor 
X.ROTL: 

for i <- 0 to 128-size by size 

ai+size-l..i <~ ci+size-1 -shift.. i || ci+ s ize-l..i+size-l -shift 

endfor 

XROTR: 

for i <r- 0 to 128-size by size 

ai+size-l..i <- ci+ s hift-l..i II cj+size-L.i+shift 
endfor 
X.SHL: 

for i «- 0 to 128-size by size 

ai+size-L.i <- ci+size-1 -shift.. i II 0^ 
endfor 
X.SHL.O: 

for i <- 0 to 128-size by size 

if ci+size-l..i+size-1-shift * cfMeh-shift *en 
raise FixedPointArithmetic 

endif 

ai+size-l,.i <- ci+size-1-shift..ill 0 shift 
endfor 



F/G. 32C-2 



X.SHL.U.O: 

for i <- 0 to 128-size by size 

if ci+size-I ..i+size-shift * 0 shift then 
raise FixedPointArithmetic 

endif 

ai+size-l..i <- ci+ s ize-l -shift.. ill 0 shift 
endfor 
X.SHR: 

for i «- 0 to 128-size by size 

ai+size-l..i «- ^-feSe-i II <n+size-l..i+shift 
endfor 
X.SHR.U: 

for i <- 0 to 128-size by size 

ai+size-l..i <- 0 shift || Ci +s ize-l..i+shift 
endfor 

endcase 

RegWrite(rd, 128, a) 
enddef 
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Tm>:1 1 1. VI 1 


V 







Compress 32 bits to 16, with 4-bit right shift 



FIG. 32D 



Format 



X.EXTRACT ra=rd ? rc,rb 



ra=xextract(rd,rc,rb) 

, 31 24_23 18 17 12 11 6 5 

"I rT 



1 °P I rd 1 rc 1 rb 
8 6 T T" 



FIG. 33A 



Definition 

def CrossbarExtract(op,ra,rb,rc,rd) as 



a «- 


RegRead(rd, 128) 


C <r- 


RegRead(rc, 128) 


b <- 


RegRead(rb, 128) 


case 


b8..0of 




0..255: 




gsize <- 128 




256.J83: 




gsize <- 64 




384..447: 




gsize <- 32 




448..479: 




gsize <- 16 




480..495: 




gsize <- 8 




496..503: 




gsize <- 4 




504..507: 




gsize 4- 2 




508.511: 




gsize <- 1 



endcase 
m <r- b\2 

as «- signed <- bi4 
h <- (2-m)*gsize 

spos 4- (b8..0) and ((2-m)*gsize-l) 
dpos <- (0 1| b23..16) and (gsize-1) 
sfsize 4- (0 || b3 1..24) and (gsize-1) 

tfsize <- (sfsize = 0) or ((sfsize+dpos) > gsize) ? gsize-dpos : sfsize 
fsize «- (tfsize + spos > h) ? h - spos : tfsize 
for i <~ 0 to 128-gsize by gsize 
case op of 

X.EXTRACT: 
if m then 

P<~dgsize+i-l..i 

else 

P*-(d||c)2*(gsize+i).1..2*i 

endif 

endcase 

v«-(as&ph-l)||p 

w ^- (as & Vspos+fsize-l^-^^^P 08 II vf s ize-l+spos..spos II 0 d P os 
if m then 

asize-l+Li «- c gsize-l+Ldpos+fsize+i II wdpos+fsize-l..dpos II cdpos-1+l..i 

else 

a s ize-l+Li<- w 

endif 
endfor 

RegWrite(ra, 128, a) 
enddef 
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ab 



rd 



fsize 



Crossbar extract 
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fsize 



spos 




rd 



rc 



rb 



rd 



gsize 1 



fsize 



Crossbar merge extract 
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X. SHUFFLE .4 


Crossbar shuffle within pecks 


X. SHUFFLE. 8 


Crossbar shuffle within bytes 


X.SHUFFLE.16 


Crossbar shuffle within doublets 


X. SHUFFLE. 3 2 


Crossbar shuffle within quadlets 


X.SHUFFLE.64 


Crossbar shuffle within octlets 


X. SHUFFLE. 128 


Crossbar shuffle within hexlet 


X.SHUFFLE.256 


Crossbar shuffle within triclet 
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Format 



X.SHUFFLE.256 rd^rcrb.v^h 
X. SHUFFLE, size rd=rcb> v, w 



rd=xshuffle256f rc.rb. v, w, hi\ 
rd=xshufflesize(rcb,v,w) 

31 24 23 



18 17 



12 11 



6 5 0 



I X.SHUFFLE | 




rc 



I rb I ^ | 



8 



6 



6 



6 6 



rc <- rb <- rcb 
x«-log2(size) 
y<-log2(v) 
z<-log2(w) 

op <- ((x*x*x-3*x*x-4*x)/6-(z*z-z)/2+x*z+y) + (size=256)*(h*32-56) 
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Definition 

def CrossbarShuffle(major,rd,rc,rb,op) 
c <r- RegRead(rc, 128) 
b <- RegRead(rb, 128) 
if rc=rb then 

case op of 
0..55: 

for x <- 2 to 7; for y <- 0 to x-2; for z <- 1 to x-y-1 

if op = ((x*x*x-3*x*x-4*x)/6-<z*z-z)/2+x*z+y) then 
fori <-0 to 127 

a « <- c 06..x II iy+z-l..y II i x -l..y+z II iy-1..0) 

end 

. endif 
endfor; endfor; endfor 
S6..63: 

raise Reservedlnstruction 

endcase 

elseif 

case op4 m o of 
0..27: 

cb<-c||b 

h <r- ops 

for y <- 0 to x-2; for z <- 1 to x-y-1 
if op4..o = ((17*z-z*z)/2-8+y) then 
fori<-h*128to 127+h*128 

*i-h*128 <- cb(i y+z ., y || k ., y+z h iy l Q) 

end 

endif 
endfor; endfor 
28..31: 

raise Reservedlnstruction 

endcase 

endif 

RegWrite(rd, 128, a) 
enddef 
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